mirror of
https://github.com/github/codeql.git
synced 2025-12-24 12:46:34 +01:00
Merge branch 'main' into amammad-python-bombs
This commit is contained in:
@@ -3,3 +3,4 @@ groups: [python, test, consistency-queries]
|
||||
dependencies:
|
||||
codeql/python-all: ${workspace}
|
||||
extractor: python
|
||||
warnOnImplicitThis: true
|
||||
|
||||
@@ -4,3 +4,4 @@ groups:
|
||||
- examples
|
||||
dependencies:
|
||||
codeql/python-all: ${workspace}
|
||||
warnOnImplicitThis: true
|
||||
|
||||
15
python/ql/lib/BUILD.bazel
Normal file
15
python/ql/lib/BUILD.bazel
Normal file
@@ -0,0 +1,15 @@
|
||||
load("@rules_pkg//:mappings.bzl", "pkg_files")
|
||||
|
||||
package(default_visibility = ["//python:__pkg__"])
|
||||
|
||||
pkg_files(
|
||||
name = "dbscheme",
|
||||
srcs = ["semmlecode.python.dbscheme"],
|
||||
prefix = "python",
|
||||
)
|
||||
|
||||
pkg_files(
|
||||
name = "dbscheme-stats",
|
||||
srcs = ["semmlecode.python.dbscheme.stats"],
|
||||
prefix = "python",
|
||||
)
|
||||
@@ -1,3 +1,111 @@
|
||||
## 0.11.3
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Added basic flow for attributes defined on classes, when the attribute lookup is on a direct reference to that class (so not instance, cls parameter, or self parameter). Example: class definition `class Foo: my_tuples = (dangerous, safe)` and usage `SINK(Foo.my_tuples[0])`.
|
||||
|
||||
## 0.11.2
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Added support for functions decorated with `contextlib.contextmanager`.
|
||||
* Namespace packages in the form of regular packages with missing `__init__.py`-files are now allowed. This enables the analysis to resolve modules and functions inside such packages.
|
||||
|
||||
## 0.11.1
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Added better support for API graphs when encountering `from ... import *`. For example in the code `from foo import *; Bar()`, we will now find a result for `API::moduleImport("foo").getMember("Bar").getACall()`
|
||||
* Deleted the deprecated `isBarrierGuard` predicate from the dataflow library and its uses, use `isBarrier` and the `BarrierGuard` module instead.
|
||||
* Deleted the deprecated `getAUse`, `getAnImmediateUse`, `getARhs`, and `getAValueReachingRhs` predicates from the `API::Node` class.
|
||||
* Deleted the deprecated `fullyQualifiedToAPIGraphPath` class from `SubclassFinder.qll`, use `fullyQualifiedToApiGraphPath` instead.
|
||||
* Deleted the deprecated `Paths.qll` file.
|
||||
* Deleted the deprecated `semmle.python.security.performance` folder, use `semmle.python.security.regexp` instead.
|
||||
* Deleted the deprecated `semmle.python.security.strings` and `semmle.python.web` folders.
|
||||
* Improved modeling of decoding through pickle related functions (which can lead to code execution), resulting in additional sinks for the _Deserializing untrusted input_ query (`py/unsafe-deserialization`). Added support for `pandas.read_pickle`, `numpy.load` and `joblib.load`.
|
||||
|
||||
## 0.11.0
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Django Rest Framework better handles custom `ModelViewSet` classes functions
|
||||
* Regular expression fragments residing inside implicitly concatenated strings now have better location information.
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* Subterms of regular expressions encoded as single-line string literals now have better source-location information.
|
||||
|
||||
## 0.10.5
|
||||
|
||||
No user-facing changes.
|
||||
|
||||
## 0.10.4
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Regular expressions containing multiple parse mode flags are now interpretted correctly. For example `"(?is)abc.*"` with both the `i` and `s` flags.
|
||||
* Added `shlex.quote` as a sanitizer for the `py/shell-command-constructed-from-input` query.
|
||||
|
||||
## 0.10.3
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Support analyzing packages (folders with python code) that do not have `__init__.py` files, although this is technically required, we see real world projects that don't have this.
|
||||
* Added modeling of AWS Lambda handlers that can be identified with `AWS::Serverless::Function` in YAML files, where the event parameter is modeled as a remote-flow-source.
|
||||
* Improvements of the `aiohttp` models including remote-flow-sources from type annotations, new path manipulation, and SSRF sinks.
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* Fixed the computation of locations for imports with aliases in jump-to-definition.
|
||||
|
||||
## 0.10.2
|
||||
|
||||
No user-facing changes.
|
||||
|
||||
## 0.10.1
|
||||
|
||||
### New Features
|
||||
|
||||
* The `DataFlow::StateConfigSig` signature module has gained default implementations for `isBarrier/2` and `isAdditionalFlowStep/4`.
|
||||
Hence it is no longer needed to provide `none()` implementations of these predicates if they are not needed.
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Data flow configurations can now include a predicate `neverSkip(Node node)`
|
||||
in order to ensure inclusion of certain nodes in the path explanations. The
|
||||
predicate defaults to the end-points of the additional flow steps provided in
|
||||
the configuration, which means that such steps now always are visible by
|
||||
default in path explanations.
|
||||
* Add support for Models as Data for Reflected XSS query
|
||||
* Parameters with a default value are now considered a `DefinitionNode`. This improvement was motivated by allowing type-tracking and API graphs to follow flow from such a default value to a use by a captured variable.
|
||||
|
||||
## 0.10.0
|
||||
|
||||
### New Features
|
||||
|
||||
* It is now possible to specify flow summaries in the format "MyPkg;Member[list_map];Argument[1].ListElement;Argument[0].Parameter[0];value"
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Deleted many models that used the old dataflow library, the new models can be found in the `python/ql/lib/semmle/python/frameworks` folder.
|
||||
* More precise modeling of several container functions (such as `sorted`, `reversed`) and methods (such as `set.add`, `list.append`).
|
||||
* Added modeling of taint flow through the template argument of `flask.render_template_string` and `flask.stream_template_string`.
|
||||
* Deleted many deprecated predicates and classes with uppercase `API`, `HTTP`, `XSS`, `SQL`, etc. in their names. Use the PascalCased versions instead.
|
||||
* Deleted the deprecated `getName()` predicate from the `Container` class, use `getAbsolutePath()` instead.
|
||||
* Deleted many deprecated module names that started with a lowercase letter, use the versions that start with an uppercase letter instead.
|
||||
* Deleted many deprecated predicates in `PointsTo.qll`.
|
||||
* Deleted many deprecated files from the `semmle.python.security` package.
|
||||
* Deleted the deprecated `BottleRoutePointToExtension` class from `Extensions.qll`.
|
||||
* Type tracking is now aware of flow summaries. This leads to a richer API graph, and may lead to more results in some queries.
|
||||
|
||||
## 0.9.4
|
||||
|
||||
No user-facing changes.
|
||||
|
||||
## 0.9.3
|
||||
|
||||
No user-facing changes.
|
||||
|
||||
## 0.9.2
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
@@ -492,9 +492,14 @@ class NiceLocationExpr extends Expr {
|
||||
// for `import xxx` or for `import xxx as yyy`.
|
||||
this.(ImportExpr).getLocation().hasLocationInfo(f, bl, bc, el, ec)
|
||||
or
|
||||
/* Show y for `y` in `from xxx import y` */
|
||||
exists(string name |
|
||||
name = this.(ImportMember).getName() and
|
||||
// Show y for `y` in `from xxx import y`
|
||||
// and y for `yyy as y` in `from xxx import yyy as y`.
|
||||
exists(string name, Alias alias |
|
||||
// This alias will always exist, as `from xxx import y`
|
||||
// is expanded to `from xxx imprt y as y`.
|
||||
this = alias.getValue() and
|
||||
name = alias.getAsname().(Name).getId()
|
||||
|
|
||||
this.(ImportMember).getLocation().hasLocationInfo(f, _, _, el, ec) and
|
||||
bl = el and
|
||||
bc = ec - name.length() + 1
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* Deleted many deprecated predicates and classes with uppercase `API`, `HTTP`, `XSS`, `SQL`, etc. in their names. Use the PascalCased versions instead.
|
||||
* Deleted the deprecated `getName()` predicate from the `Container` class, use `getAbsolutePath()` instead.
|
||||
* Deleted many deprecated module names that started with a lowercase letter, use the versions that start with an uppercase letter instead.
|
||||
* Deleted many deprecated predicates in `PointsTo.qll`.
|
||||
* Deleted many deprecated files from the `semmle.python.security` package.
|
||||
* Deleted the deprecated `BottleRoutePointToExtension` class from `Extensions.qll`.
|
||||
@@ -1,4 +0,0 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* Added modeling of taint flow through the template argument of `flask.render_template_string` and `flask.stream_template_string`.
|
||||
@@ -1,4 +0,0 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* More precise modelling of several container functions (such as `sorted`, `reversed`) and methods (such as `set.add`, `list.append`).
|
||||
4
python/ql/lib/change-notes/2023-11-08-re-modeling.md
Normal file
4
python/ql/lib/change-notes/2023-11-08-re-modeling.md
Normal file
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* Added taint-flow modeling for regular expressions with `re` module from the standard library.
|
||||
@@ -0,0 +1,5 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
|
||||
- Added support for type parameters in function and class definitions, as well as the new Python 3.12 type alias statement.
|
||||
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* Added modeling of `*args` and `**kwargs` as routed-parameters in request handlers for django/flask/FastAPI/tornado.
|
||||
@@ -0,0 +1,5 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
|
||||
- Added support for tarfile extraction filters as defined in [PEP-706](https://peps.python.org/pep-0706). In particular, calls to `TarFile.extract`, and `TarFile.extractall` are no longer considered to be sinks for the `py/tarslip` query if a sufficiently safe filter is provided.
|
||||
18
python/ql/lib/change-notes/released/0.10.0.md
Normal file
18
python/ql/lib/change-notes/released/0.10.0.md
Normal file
@@ -0,0 +1,18 @@
|
||||
## 0.10.0
|
||||
|
||||
### New Features
|
||||
|
||||
* It is now possible to specify flow summaries in the format "MyPkg;Member[list_map];Argument[1].ListElement;Argument[0].Parameter[0];value"
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Deleted many models that used the old dataflow library, the new models can be found in the `python/ql/lib/semmle/python/frameworks` folder.
|
||||
* More precise modeling of several container functions (such as `sorted`, `reversed`) and methods (such as `set.add`, `list.append`).
|
||||
* Added modeling of taint flow through the template argument of `flask.render_template_string` and `flask.stream_template_string`.
|
||||
* Deleted many deprecated predicates and classes with uppercase `API`, `HTTP`, `XSS`, `SQL`, etc. in their names. Use the PascalCased versions instead.
|
||||
* Deleted the deprecated `getName()` predicate from the `Container` class, use `getAbsolutePath()` instead.
|
||||
* Deleted many deprecated module names that started with a lowercase letter, use the versions that start with an uppercase letter instead.
|
||||
* Deleted many deprecated predicates in `PointsTo.qll`.
|
||||
* Deleted many deprecated files from the `semmle.python.security` package.
|
||||
* Deleted the deprecated `BottleRoutePointToExtension` class from `Extensions.qll`.
|
||||
* Type tracking is now aware of flow summaries. This leads to a richer API graph, and may lead to more results in some queries.
|
||||
16
python/ql/lib/change-notes/released/0.10.1.md
Normal file
16
python/ql/lib/change-notes/released/0.10.1.md
Normal file
@@ -0,0 +1,16 @@
|
||||
## 0.10.1
|
||||
|
||||
### New Features
|
||||
|
||||
* The `DataFlow::StateConfigSig` signature module has gained default implementations for `isBarrier/2` and `isAdditionalFlowStep/4`.
|
||||
Hence it is no longer needed to provide `none()` implementations of these predicates if they are not needed.
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Data flow configurations can now include a predicate `neverSkip(Node node)`
|
||||
in order to ensure inclusion of certain nodes in the path explanations. The
|
||||
predicate defaults to the end-points of the additional flow steps provided in
|
||||
the configuration, which means that such steps now always are visible by
|
||||
default in path explanations.
|
||||
* Add support for Models as Data for Reflected XSS query
|
||||
* Parameters with a default value are now considered a `DefinitionNode`. This improvement was motivated by allowing type-tracking and API graphs to follow flow from such a default value to a use by a captured variable.
|
||||
3
python/ql/lib/change-notes/released/0.10.2.md
Normal file
3
python/ql/lib/change-notes/released/0.10.2.md
Normal file
@@ -0,0 +1,3 @@
|
||||
## 0.10.2
|
||||
|
||||
No user-facing changes.
|
||||
11
python/ql/lib/change-notes/released/0.10.3.md
Normal file
11
python/ql/lib/change-notes/released/0.10.3.md
Normal file
@@ -0,0 +1,11 @@
|
||||
## 0.10.3
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Support analyzing packages (folders with python code) that do not have `__init__.py` files, although this is technically required, we see real world projects that don't have this.
|
||||
* Added modeling of AWS Lambda handlers that can be identified with `AWS::Serverless::Function` in YAML files, where the event parameter is modeled as a remote-flow-source.
|
||||
* Improvements of the `aiohttp` models including remote-flow-sources from type annotations, new path manipulation, and SSRF sinks.
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* Fixed the computation of locations for imports with aliases in jump-to-definition.
|
||||
6
python/ql/lib/change-notes/released/0.10.4.md
Normal file
6
python/ql/lib/change-notes/released/0.10.4.md
Normal file
@@ -0,0 +1,6 @@
|
||||
## 0.10.4
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Regular expressions containing multiple parse mode flags are now interpretted correctly. For example `"(?is)abc.*"` with both the `i` and `s` flags.
|
||||
* Added `shlex.quote` as a sanitizer for the `py/shell-command-constructed-from-input` query.
|
||||
3
python/ql/lib/change-notes/released/0.10.5.md
Normal file
3
python/ql/lib/change-notes/released/0.10.5.md
Normal file
@@ -0,0 +1,3 @@
|
||||
## 0.10.5
|
||||
|
||||
No user-facing changes.
|
||||
10
python/ql/lib/change-notes/released/0.11.0.md
Normal file
10
python/ql/lib/change-notes/released/0.11.0.md
Normal file
@@ -0,0 +1,10 @@
|
||||
## 0.11.0
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Django Rest Framework better handles custom `ModelViewSet` classes functions
|
||||
* Regular expression fragments residing inside implicitly concatenated strings now have better location information.
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
* Subterms of regular expressions encoded as single-line string literals now have better source-location information.
|
||||
12
python/ql/lib/change-notes/released/0.11.1.md
Normal file
12
python/ql/lib/change-notes/released/0.11.1.md
Normal file
@@ -0,0 +1,12 @@
|
||||
## 0.11.1
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Added better support for API graphs when encountering `from ... import *`. For example in the code `from foo import *; Bar()`, we will now find a result for `API::moduleImport("foo").getMember("Bar").getACall()`
|
||||
* Deleted the deprecated `isBarrierGuard` predicate from the dataflow library and its uses, use `isBarrier` and the `BarrierGuard` module instead.
|
||||
* Deleted the deprecated `getAUse`, `getAnImmediateUse`, `getARhs`, and `getAValueReachingRhs` predicates from the `API::Node` class.
|
||||
* Deleted the deprecated `fullyQualifiedToAPIGraphPath` class from `SubclassFinder.qll`, use `fullyQualifiedToApiGraphPath` instead.
|
||||
* Deleted the deprecated `Paths.qll` file.
|
||||
* Deleted the deprecated `semmle.python.security.performance` folder, use `semmle.python.security.regexp` instead.
|
||||
* Deleted the deprecated `semmle.python.security.strings` and `semmle.python.web` folders.
|
||||
* Improved modeling of decoding through pickle related functions (which can lead to code execution), resulting in additional sinks for the _Deserializing untrusted input_ query (`py/unsafe-deserialization`). Added support for `pandas.read_pickle`, `numpy.load` and `joblib.load`.
|
||||
6
python/ql/lib/change-notes/released/0.11.2.md
Normal file
6
python/ql/lib/change-notes/released/0.11.2.md
Normal file
@@ -0,0 +1,6 @@
|
||||
## 0.11.2
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Added support for functions decorated with `contextlib.contextmanager`.
|
||||
* Namespace packages in the form of regular packages with missing `__init__.py`-files are now allowed. This enables the analysis to resolve modules and functions inside such packages.
|
||||
5
python/ql/lib/change-notes/released/0.11.3.md
Normal file
5
python/ql/lib/change-notes/released/0.11.3.md
Normal file
@@ -0,0 +1,5 @@
|
||||
## 0.11.3
|
||||
|
||||
### Minor Analysis Improvements
|
||||
|
||||
* Added basic flow for attributes defined on classes, when the attribute lookup is on a direct reference to that class (so not instance, cls parameter, or self parameter). Example: class definition `class Foo: my_tuples = (dangerous, safe)` and usage `SINK(Foo.my_tuples[0])`.
|
||||
3
python/ql/lib/change-notes/released/0.9.3.md
Normal file
3
python/ql/lib/change-notes/released/0.9.3.md
Normal file
@@ -0,0 +1,3 @@
|
||||
## 0.9.3
|
||||
|
||||
No user-facing changes.
|
||||
3
python/ql/lib/change-notes/released/0.9.4.md
Normal file
3
python/ql/lib/change-notes/released/0.9.4.md
Normal file
@@ -0,0 +1,3 @@
|
||||
## 0.9.4
|
||||
|
||||
No user-facing changes.
|
||||
@@ -1,2 +1,2 @@
|
||||
---
|
||||
lastReleaseVersion: 0.9.2
|
||||
lastReleaseVersion: 0.11.3
|
||||
|
||||
6
python/ql/lib/experimental/cryptography/Concepts.qll
Normal file
6
python/ql/lib/experimental/cryptography/Concepts.qll
Normal file
@@ -0,0 +1,6 @@
|
||||
import experimental.cryptography.CryptoArtifact
|
||||
import experimental.cryptography.CryptoAlgorithmNames
|
||||
import semmle.python.ApiGraphs
|
||||
import experimental.cryptography.modules.stdlib.HashlibModule as HashLibModule
|
||||
import experimental.cryptography.modules.stdlib.HmacModule as HMacModule
|
||||
import experimental.cryptography.modules.CryptographyModule as CryptographyModule
|
||||
228
python/ql/lib/experimental/cryptography/CryptoAlgorithmNames.qll
Normal file
228
python/ql/lib/experimental/cryptography/CryptoAlgorithmNames.qll
Normal file
@@ -0,0 +1,228 @@
|
||||
/**
|
||||
* Names of known cryptographic algorithms.
|
||||
* The names are standardized into upper-case, no spaces, dashes or underscores.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Returns a string to represent generally unknown algorithms.
|
||||
* Predicate is to be used to get a consistent string representation
|
||||
* for unknown algorithms.
|
||||
*/
|
||||
string unknownAlgorithm() { result = "UNKNOWN" }
|
||||
|
||||
string getHashType() { result = "HASH" }
|
||||
|
||||
string getSymmetricEncryptionType() { result = "SYMMETRIC_ENCRYPTION" }
|
||||
|
||||
string getAsymmetricEncryptionType() { result = "ASYMMETRIC_ENCRYPTION" }
|
||||
|
||||
string getKeyDerivationType() { result = "KEY_DERIVATION" }
|
||||
|
||||
string getCipherBlockModeType() { result = "BLOCK_MODE" }
|
||||
|
||||
string getSymmetricPaddingType() { result = "SYMMETRIC_PADDING" }
|
||||
|
||||
string getAsymmetricPaddingType() { result = "ASYMMETRIC_PADDING" }
|
||||
|
||||
string getEllipticCurveType() { result = "ELLIPTIC_CURVE" }
|
||||
|
||||
string getSignatureType() { result = "SIGNATURE" }
|
||||
|
||||
string getKeyExchangeType() { result = "KEY_EXCHANGE" }
|
||||
|
||||
predicate isKnownType(string algType) {
|
||||
algType in [
|
||||
getHashType(), getSymmetricEncryptionType(), getAsymmetricEncryptionType(),
|
||||
getKeyDerivationType(), getCipherBlockModeType(), getSymmetricPaddingType(),
|
||||
getAsymmetricPaddingType(), getEllipticCurveType(), getSignatureType(), getKeyExchangeType()
|
||||
]
|
||||
}
|
||||
|
||||
predicate isKnownAlgorithm(string name) { isKnownAlgorithm(name, _) }
|
||||
|
||||
predicate isKnownAlgorithm(string name, string algType) {
|
||||
isHashingAlgorithm(name) and algType = "HASH"
|
||||
or
|
||||
isEncryptionAlgorithm(name, algType) and
|
||||
algType in ["SYMMETRIC_ENCRYPTION", "ASYMMETRIC_ENCRYPTION"]
|
||||
or
|
||||
isKeyDerivationAlgorithm(name) and algType = "KEY_DERIVATION"
|
||||
or
|
||||
isCipherBlockModeAlgorithm(name) and algType = "BLOCK_MODE"
|
||||
or
|
||||
isPaddingAlgorithm(name, algType) and algType in ["SYMMETRIC_PADDING", "ASYMMETRIC_PADDING"]
|
||||
or
|
||||
isEllipticCurveAlgorithm(name) and algType = "ELLIPTIC_CURVE"
|
||||
or
|
||||
isSignatureAlgorithm(name) and algType = "SIGNATURE"
|
||||
or
|
||||
isKeyExchangeAlgorithm(name) and algType = "KEY_EXCHANGE"
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `name` is a known hashing algorithm in the model/library.
|
||||
*/
|
||||
predicate isHashingAlgorithm(string name) {
|
||||
name =
|
||||
[
|
||||
"BLAKE2", "BLAKE2B", "BLAKE2S", "SHA2", "SHA224", "SHA256", "SHA384", "SHA512", "SHA512224",
|
||||
"SHA512256", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512", "SHAKE128", "SHAKE256",
|
||||
"SM3", "WHIRLPOOL", "POLY1305", "HAVEL128", "MD2", "MD4", "MD5", "PANAMA", "RIPEMD",
|
||||
"RIPEMD128", "RIPEMD256", "RIPEMD160", "RIPEMD320", "SHA0", "SHA1", "SHA", "MGF1", "MGF1SHA1",
|
||||
"MDC2", "SIPHASH"
|
||||
]
|
||||
}
|
||||
|
||||
predicate isEncryptionAlgorithm(string name, string algType) {
|
||||
isAsymmetricEncryptionAlgorithm(name) and algType = "ASYMMETRIC_ENCRYPTION"
|
||||
or
|
||||
isSymmetricEncryptionAlgorithm(name) and algType = "SYMMETRIC_ENCRYPTION"
|
||||
}
|
||||
|
||||
predicate isEncryptionAlgorithm(string name) { isEncryptionAlgorithm(name, _) }
|
||||
|
||||
/**
|
||||
* Holds if `name` corresponds to a known symmetric encryption algorithm.
|
||||
*/
|
||||
predicate isSymmetricEncryptionAlgorithm(string name) {
|
||||
// NOTE: AES is meant to caputure all possible key lengths
|
||||
name =
|
||||
[
|
||||
"AES", "AES128", "AES192", "AES256", "ARIA", "BLOWFISH", "BF", "ECIES", "CAST", "CAST5",
|
||||
"CAMELLIA", "CAMELLIA128", "CAMELLIA192", "CAMELLIA256", "CHACHA", "CHACHA20",
|
||||
"CHACHA20POLY1305", "GOST", "GOSTR34102001", "GOSTR341094", "GOSTR341194", "GOST2814789",
|
||||
"GOSTR341194", "GOST2814789", "GOST28147", "GOSTR341094", "GOST89", "GOST94", "GOST34102012",
|
||||
"GOST34112012", "IDEA", "RABBIT", "SEED", "SM4", "DES", "DESX", "3DES", "TDES", "2DES",
|
||||
"DES3", "TRIPLEDES", "TDEA", "TRIPLEDEA", "ARC2", "RC2", "ARC4", "RC4", "ARCFOUR", "ARC5",
|
||||
"RC5", "MAGMA", "KUZNYECHIK"
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `name` corresponds to a known key derivation algorithm.
|
||||
*/
|
||||
predicate isKeyDerivationAlgorithm(string name) {
|
||||
name =
|
||||
[
|
||||
"ARGON2", "CONCATKDF", "CONCATKDFHASH", "CONCATKDFHMAC", "KBKDFCMAC", "BCRYPT", "HKDF",
|
||||
"HKDFEXPAND", "KBKDF", "KBKDFHMAC", "PBKDF1", "PBKDF2", "PBKDF2HMAC", "PKCS5", "SCRYPT",
|
||||
"X963KDF", "EVPKDF"
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `name` corresponds to a known cipher block mode
|
||||
*/
|
||||
predicate isCipherBlockModeAlgorithm(string name) {
|
||||
name = ["CBC", "GCM", "CCM", "CFB", "OFB", "CFB8", "CTR", "OPENPGP", "XTS", "EAX", "SIV", "ECB"]
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `name` corresponds to a known padding algorithm
|
||||
*/
|
||||
predicate isPaddingAlgorithm(string name, string algType) {
|
||||
isSymmetricPaddingAlgorithm(name) and algType = "SYMMETRIC_PADDING"
|
||||
or
|
||||
isAsymmetricPaddingAlgorithm(name) and algType = "ASYMMETRIC_PADDING"
|
||||
}
|
||||
|
||||
/**
|
||||
* holds if `name` corresponds to a known symmetric padding algorithm
|
||||
*/
|
||||
predicate isSymmetricPaddingAlgorithm(string name) { name = ["PKCS7", "ANSIX923"] }
|
||||
|
||||
/**
|
||||
* Holds if `name` corresponds to a known asymmetric padding algorithm
|
||||
*/
|
||||
predicate isAsymmetricPaddingAlgorithm(string name) { name = ["OAEP", "PKCS1V15", "PSS", "KEM"] }
|
||||
|
||||
predicate isBrainpoolCurve(string curveName, int keySize) {
|
||||
// ALL BRAINPOOL CURVES
|
||||
keySize in [160, 192, 224, 256, 320, 384, 512] and
|
||||
(
|
||||
curveName = "BRAINPOOLP" + keySize.toString() + "R1"
|
||||
or
|
||||
curveName = "BRAINPOOLP" + keySize.toString() + "T1"
|
||||
)
|
||||
}
|
||||
|
||||
predicate isSecCurve(string curveName, int keySize) {
|
||||
// ALL SEC CURVES
|
||||
keySize in [112, 113, 128, 131, 160, 163, 192, 193, 224, 233, 239, 256, 283, 384, 409, 521, 571] and
|
||||
exists(string suff | suff in ["R1", "R2", "K1"] |
|
||||
curveName = "SECT" + keySize.toString() + suff or
|
||||
curveName = "SECP" + keySize.toString() + suff
|
||||
)
|
||||
}
|
||||
|
||||
predicate isC2Curve(string curveName, int keySize) {
|
||||
// ALL C2 CURVES
|
||||
keySize in [163, 176, 191, 208, 239, 272, 304, 359, 368, 431] and
|
||||
exists(string pre, string suff |
|
||||
pre in ["PNB", "ONB", "TNB"] and suff in ["V1", "V2", "V3", "V4", "V5", "W1", "R1"]
|
||||
|
|
||||
curveName = "C2" + pre + keySize.toString() + suff
|
||||
)
|
||||
}
|
||||
|
||||
predicate isPrimeCurve(string curveName, int keySize) {
|
||||
// ALL PRIME CURVES
|
||||
keySize in [192, 239, 256] and
|
||||
exists(string suff | suff in ["V1", "V2", "V3"] | curveName = "PRIME" + keySize.toString() + suff)
|
||||
}
|
||||
|
||||
predicate isEllipticCurveAlgorithm(string curveName) { isEllipticCurveAlgorithm(curveName, _) }
|
||||
|
||||
/**
|
||||
* Holds if `name` corresponds to a known elliptic curve.
|
||||
*/
|
||||
predicate isEllipticCurveAlgorithm(string curveName, int keySize) {
|
||||
isSecCurve(curveName, keySize)
|
||||
or
|
||||
isBrainpoolCurve(curveName, keySize)
|
||||
or
|
||||
isC2Curve(curveName, keySize)
|
||||
or
|
||||
isPrimeCurve(curveName, keySize)
|
||||
or
|
||||
curveName = "ES256" and keySize = 256
|
||||
or
|
||||
curveName = "CURVE25519" and keySize = 255
|
||||
or
|
||||
curveName = "X25519" and keySize = 255
|
||||
or
|
||||
curveName = "ED25519" and keySize = 255
|
||||
or
|
||||
curveName = "CURVE448" and keySize = 448 // TODO: need to check the key size
|
||||
or
|
||||
curveName = "ED448" and keySize = 448
|
||||
or
|
||||
curveName = "X448" and keySize = 448
|
||||
or
|
||||
curveName = "NUMSP256T1" and keySize = 256
|
||||
or
|
||||
curveName = "NUMSP384T1" and keySize = 384
|
||||
or
|
||||
curveName = "NUMSP512T1" and keySize = 512
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `name` corresponds to a known signature algorithm.
|
||||
*/
|
||||
predicate isSignatureAlgorithm(string name) {
|
||||
name =
|
||||
[
|
||||
"DSA", "ECDSA", "EDDSA", "ES256", "ES256K", "ES384", "ES512", "ED25519", "ED448", "ECDSA256",
|
||||
"ECDSA384", "ECDSA512"
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `name` is a key exchange algorithm.
|
||||
*/
|
||||
predicate isKeyExchangeAlgorithm(string name) { name = ["ECDH", "DIFFIEHELLMAN", "X25519", "X448"] }
|
||||
|
||||
/**
|
||||
* Holds if `name` corresponds to a known asymmetric encryption.
|
||||
*/
|
||||
predicate isAsymmetricEncryptionAlgorithm(string name) { name = ["RSA"] }
|
||||
264
python/ql/lib/experimental/cryptography/CryptoArtifact.qll
Normal file
264
python/ql/lib/experimental/cryptography/CryptoArtifact.qll
Normal file
@@ -0,0 +1,264 @@
|
||||
import python
|
||||
private import semmle.python.ApiGraphs
|
||||
private import experimental.cryptography.CryptoAlgorithmNames
|
||||
private import experimental.cryptography.utils.Utils as Utils
|
||||
|
||||
/*
|
||||
* A cryptographic artifact is a DataFlow::Node associated with some
|
||||
* operation, algorithm, or any other aspect of cryptography.
|
||||
*/
|
||||
|
||||
abstract class CryptographicArtifact extends DataFlow::Node { }
|
||||
|
||||
/**
|
||||
* Associates a symmetric encryption algorithm with a block mode.
|
||||
* The DataFlow::Node representing this association should be the
|
||||
* point where the algorithm and block mode are combined.
|
||||
* This may be at the call to encryption or in the construction
|
||||
* of an object prior to encryption.
|
||||
*/
|
||||
abstract class SymmetricCipher extends CryptographicArtifact {
|
||||
abstract SymmetricEncryptionAlgorithm getEncryptionAlgorithm();
|
||||
|
||||
abstract BlockMode getBlockMode();
|
||||
|
||||
final predicate hasBlockMode() { exists(this.getBlockMode()) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A cryptographic operation is a method call that invokes a cryptographic
|
||||
* algorithm (encrypt/decrypt) or a function in support of a cryptographic algorithm
|
||||
* (key generation).
|
||||
*
|
||||
* Since operations are related to or in support of algorithms, operations must
|
||||
* provide a reference to their associated algorithm. Often operataions themselves
|
||||
* encapsulate algorithms, so operations can also extend CryptographicAlgorithm
|
||||
* and refer to themselves as the target algorithm.
|
||||
*/
|
||||
abstract class CryptographicOperation extends CryptographicArtifact, API::CallNode {
|
||||
bindingset[paramName, ind]
|
||||
final DataFlow::Node getParameterSource(int ind, string paramName) {
|
||||
result = Utils::getUltimateSrcFromApiNode(this.(API::CallNode).getParameter(ind, paramName))
|
||||
}
|
||||
|
||||
final string getAlgorithmName() {
|
||||
if exists(this.getAlgorithm().getName())
|
||||
then result = this.getAlgorithm().getName()
|
||||
else result = unknownAlgorithm()
|
||||
}
|
||||
|
||||
final predicate hasAlgorithm() { exists(this.getAlgorithm()) }
|
||||
|
||||
final predicate isUnknownAlgorithm() {
|
||||
this.getAlgorithmName() = unknownAlgorithm()
|
||||
or
|
||||
not this.hasAlgorithm()
|
||||
}
|
||||
|
||||
// TODO: this might have to be parameterized by a configuration source for
|
||||
// situations where an operation is passed an algorithm
|
||||
abstract CryptographicAlgorithm getAlgorithm();
|
||||
}
|
||||
|
||||
/** A key generation operation for asymmetric keys */
|
||||
abstract class KeyGen extends CryptographicOperation {
|
||||
int getAKeySizeInBits() { result = this.getKeySizeInBits(_) }
|
||||
|
||||
final predicate hasKeySize(DataFlow::Node configSrc) { exists(this.getKeySizeInBits(configSrc)) }
|
||||
|
||||
final predicate hasKeySize() { exists(this.getAKeySizeInBits()) }
|
||||
|
||||
abstract DataFlow::Node getKeyConfigSrc();
|
||||
|
||||
abstract int getKeySizeInBits(DataFlow::Node configSrc);
|
||||
}
|
||||
|
||||
abstract class AsymmetricKeyGen extends KeyGen { }
|
||||
|
||||
abstract class SymmetricKeyGen extends KeyGen { }
|
||||
|
||||
/**
|
||||
* A cryptographic algorithm is a `CryptographicArtifact`
|
||||
* representing a cryptographic algorithm (see `CryptoAlgorithmNames.qll`).
|
||||
* Cryptographic algorithms can be functions referencing common crypto algorithms (e.g., hashlib.md5)
|
||||
* or strings that are used in cryptographic operation configurations (e.g., hashlib.new("md5")).
|
||||
* Cryptogrpahic algorithms may also be operations that wrap or abstract one or
|
||||
* more algorithms (e.g., cyrptography.fernet.Fernet and AES, CBC and PKCS7).
|
||||
*
|
||||
* In principle, this class should model the location where an algorithm enters the program, not
|
||||
* necessarily where it is used.
|
||||
*/
|
||||
abstract class CryptographicAlgorithm extends CryptographicArtifact {
|
||||
abstract string getName();
|
||||
|
||||
// TODO: handle case where name isn't known, not just unknown?
|
||||
/**
|
||||
* Normalizes a raw name into a normalized name as found in `CryptoAlgorithmNames.qll`.
|
||||
* Subclassess should override for more api-specific normalization.
|
||||
* By deafult, converts a raw name to upper-case with no hyphen, underscore, hash, or space.
|
||||
*/
|
||||
bindingset[s]
|
||||
string normalizeName(string s) {
|
||||
exists(string normStr | normStr = s.toUpperCase().regexpReplaceAll("[-_ ]", "") |
|
||||
result = normStr and isKnownAlgorithm(result)
|
||||
or
|
||||
result = unknownAlgorithm() and not isKnownAlgorithm(normStr)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// class UnknownAlgorithm extends DataFlow::Node instanceof CryptographicAlgorithm{
|
||||
// UnknownAlgorithm(){
|
||||
// super.getName() = unknownAlgorithm()
|
||||
// }
|
||||
// }
|
||||
abstract class HashAlgorithm extends CryptographicAlgorithm {
|
||||
final string getHashName() {
|
||||
if exists(string n | n = this.getName() and isHashingAlgorithm(n))
|
||||
then isHashingAlgorithm(result) and result = this.getName()
|
||||
else result = unknownAlgorithm()
|
||||
}
|
||||
}
|
||||
|
||||
abstract class KeyDerivationAlgorithm extends CryptographicAlgorithm {
|
||||
final string getKDFName() {
|
||||
if exists(string n | n = this.getName() and isKeyDerivationAlgorithm(n))
|
||||
then isKeyDerivationAlgorithm(result) and result = this.getName()
|
||||
else result = unknownAlgorithm()
|
||||
}
|
||||
}
|
||||
|
||||
abstract class KeyDerivationOperation extends CryptographicOperation {
|
||||
DataFlow::Node getIterationSizeSrc() { none() }
|
||||
|
||||
DataFlow::Node getSaltConfigSrc() { none() }
|
||||
|
||||
DataFlow::Node getHashConfigSrc() { none() }
|
||||
|
||||
// TODO: get encryption algorithm for CBC-based KDF?
|
||||
DataFlow::Node getDerivedKeySizeSrc() { none() }
|
||||
|
||||
DataFlow::Node getModeSrc() { none() }
|
||||
|
||||
// TODO: add more to cover all the parameters of most KDF operations? Perhaps subclass for each type?
|
||||
abstract predicate requiresIteration();
|
||||
|
||||
abstract predicate requiresSalt();
|
||||
|
||||
abstract predicate requiresHash();
|
||||
|
||||
//abstract predicate requiresKeySize(); // Going to assume all requires a size
|
||||
abstract predicate requiresMode();
|
||||
}
|
||||
|
||||
/**
|
||||
* A parent class to represent any algorithm for which
|
||||
* asymmetric cryptography is involved.
|
||||
* Intended to be distinct from AsymmetricEncryptionAlgorithm
|
||||
* which is intended only for asymmetric algorithms that specifically encrypt.
|
||||
*/
|
||||
abstract class AsymmetricAlgorithm extends CryptographicAlgorithm { }
|
||||
|
||||
abstract class EncryptionAlgorithm extends CryptographicAlgorithm {
|
||||
final predicate isAsymmetric() { this instanceof AsymmetricEncryptionAlgorithm }
|
||||
|
||||
final predicate isSymmetric() { not this.isAsymmetric() }
|
||||
// NOTE: DO_NOT add getEncryptionName here, we rely on the fact the parent
|
||||
// class does not have this common predicate.
|
||||
}
|
||||
|
||||
/**
|
||||
* Algorithms directly or indirectly related to asymmetric encryption,
|
||||
* e.g., RSA, DSA, but also RSA padding algorithms
|
||||
*/
|
||||
abstract class AsymmetricEncryptionAlgorithm extends AsymmetricAlgorithm, EncryptionAlgorithm {
|
||||
final string getEncryptionName() {
|
||||
if exists(string n | n = this.getName() and isAsymmetricEncryptionAlgorithm(n))
|
||||
then isAsymmetricEncryptionAlgorithm(result) and result = this.getName()
|
||||
else result = unknownAlgorithm()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Algorithms directly or indirectly related to symmetric encryption,
|
||||
* e.g., AES, DES, but also block modes and padding
|
||||
*/
|
||||
abstract class SymmetricEncryptionAlgorithm extends EncryptionAlgorithm {
|
||||
final string getEncryptionName() {
|
||||
if exists(string n | n = this.getName() and isSymmetricEncryptionAlgorithm(n))
|
||||
then isSymmetricEncryptionAlgorithm(result) and result = this.getName()
|
||||
else result = unknownAlgorithm()
|
||||
}
|
||||
// TODO: add a stream cipher predicate?
|
||||
}
|
||||
|
||||
// Used only to categorize all padding into a single object,
|
||||
// DO_NOT add predicates here. Only for categorization purposes.
|
||||
abstract class PaddingAlgorithm extends CryptographicAlgorithm { }
|
||||
|
||||
abstract class SymmetricPadding extends PaddingAlgorithm {
|
||||
final string getPaddingName() {
|
||||
if exists(string n | n = this.getName() and isSymmetricPaddingAlgorithm(n))
|
||||
then isSymmetricPaddingAlgorithm(result) and result = this.getName()
|
||||
else result = unknownAlgorithm()
|
||||
}
|
||||
}
|
||||
|
||||
abstract class AsymmetricPadding extends PaddingAlgorithm {
|
||||
final string getPaddingName() {
|
||||
if exists(string n | n = this.getName() and isAsymmetricPaddingAlgorithm(n))
|
||||
then isAsymmetricPaddingAlgorithm(result) and result = this.getName()
|
||||
else result = unknownAlgorithm()
|
||||
}
|
||||
}
|
||||
|
||||
abstract class EllipticCurveAlgorithm extends AsymmetricAlgorithm {
|
||||
final string getCurveName() {
|
||||
if exists(string n | n = this.getName() and isEllipticCurveAlgorithm(n))
|
||||
then isEllipticCurveAlgorithm(result) and result = this.getName()
|
||||
else result = unknownAlgorithm()
|
||||
}
|
||||
|
||||
final int getCurveBitSize() { isEllipticCurveAlgorithm(this.getCurveName(), result) }
|
||||
}
|
||||
|
||||
abstract class BlockMode extends CryptographicAlgorithm {
|
||||
final string getBlockModeName() {
|
||||
if exists(string n | n = this.getName() and isCipherBlockModeAlgorithm(n))
|
||||
then isCipherBlockModeAlgorithm(result) and result = this.getName()
|
||||
else result = unknownAlgorithm()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the source of the IV configuration.
|
||||
*/
|
||||
abstract DataFlow::Node getIVorNonce();
|
||||
|
||||
final predicate hasIVorNonce() { exists(this.getIVorNonce()) }
|
||||
}
|
||||
|
||||
abstract class KeyWrapOperation extends CryptographicOperation { }
|
||||
|
||||
abstract class AuthenticatedEncryptionAlgorithm extends SymmetricEncryptionAlgorithm {
|
||||
final string getAuthticatedEncryptionName() {
|
||||
if exists(string n | n = this.getName() and isSymmetricEncryptionAlgorithm(n))
|
||||
then isSymmetricEncryptionAlgorithm(result) and result = this.getName()
|
||||
else result = unknownAlgorithm()
|
||||
}
|
||||
}
|
||||
|
||||
abstract class KeyExchangeAlgorithm extends AsymmetricAlgorithm {
|
||||
final string getKeyExchangeName() {
|
||||
if exists(string n | n = this.getName() and isKeyExchangeAlgorithm(n))
|
||||
then isKeyExchangeAlgorithm(result) and result = this.getName()
|
||||
else result = unknownAlgorithm()
|
||||
}
|
||||
}
|
||||
|
||||
abstract class SigningAlgorithm extends AsymmetricAlgorithm {
|
||||
final string getSigningName() {
|
||||
if exists(string n | n = this.getName() and isSignatureAlgorithm(n))
|
||||
then isSignatureAlgorithm(result) and result = this.getName()
|
||||
else result = unknownAlgorithm()
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,237 @@
|
||||
import python
|
||||
import semmle.python.ApiGraphs
|
||||
import experimental.cryptography.CryptoArtifact
|
||||
private import experimental.cryptography.utils.Utils as Utils
|
||||
private import experimental.cryptography.CryptoAlgorithmNames
|
||||
|
||||
/**
|
||||
* `hashlib` is a ptyhon standard library module for hashing algorithms.
|
||||
* https://docs.python.org/3/library/hashlib.html
|
||||
* This is an abstract class to reference all hashlib artifacts.
|
||||
*/
|
||||
// -----------------------------------------------
|
||||
// Hash Artifacts
|
||||
// -----------------------------------------------
|
||||
module Hashes {
|
||||
/**
|
||||
* Represents a hash algorithm used by `hashlib.new`, where the hash algorithm is a string in the first argument.
|
||||
*/
|
||||
class HashlibNewHashAlgorithm extends HashAlgorithm {
|
||||
HashlibNewHashAlgorithm() {
|
||||
this =
|
||||
Utils::getUltimateSrcFromApiNode(API::moduleImport("hashlib")
|
||||
.getMember("new")
|
||||
.getACall()
|
||||
.getParameter(0, "name"))
|
||||
}
|
||||
|
||||
override string getName() {
|
||||
result = super.normalizeName(this.asExpr().(StrConst).getText())
|
||||
or
|
||||
// if not a known/static string, assume from an outside source and the algorithm is UNKNOWN
|
||||
not this.asExpr() instanceof StrConst and result = unknownAlgorithm()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Identifies hashlib.pbdkf2_hmac calls, identifying the hash algorithm used
|
||||
* in the hmac (matching kdf is handled separately by `HashlibPbkdf2HMACArtifact`).
|
||||
*
|
||||
* https://docs.python.org/3/library/hashlib.html#hashlib.pbkdf2_hmac
|
||||
*/
|
||||
class HashlibPbkdf2HMACHashAlgorithm extends HashAlgorithm {
|
||||
HashlibPbkdf2HMACHashAlgorithm() {
|
||||
this =
|
||||
Utils::getUltimateSrcFromApiNode(API::moduleImport("hashlib")
|
||||
.getMember("pbkdf2_hmac")
|
||||
.getACall()
|
||||
.getParameter(0, "hash_name"))
|
||||
}
|
||||
|
||||
override string getName() {
|
||||
result = super.normalizeName(this.asExpr().(StrConst).getText())
|
||||
or
|
||||
// if not a known/static string, assume from an outside source and the algorithm is UNKNOWN
|
||||
not this.asExpr() instanceof StrConst and result = unknownAlgorithm()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a call to `hashlib.file_digest` where the hash algorithm is the first argument in `digest`
|
||||
* `nameSrc` is the source of the first argument.
|
||||
*
|
||||
* https://docs.python.org/3/library/hashlib.html#hashlib.file_digest
|
||||
*
|
||||
* NOTE: the digest argument can be, in addition to a string,
|
||||
* a callable that returns a hash object or a hash constructor.
|
||||
* These cases are not considered here since they would be detected separately.
|
||||
* Specifically, other non-string cases are themselves considered sources for alerts, e.g.,
|
||||
* references to hashlib.sha512 is found by `HashlibMemberAlgorithm`.
|
||||
* The only exception is if the source is not a string constant or HashlibMemberAlgorithm.
|
||||
* In these cases, the algorithm is considered 'UNKNOWN'.
|
||||
*/
|
||||
class HashlibFileDigestAlgorithm extends HashAlgorithm {
|
||||
HashlibFileDigestAlgorithm() {
|
||||
this =
|
||||
Utils::getUltimateSrcFromApiNode(API::moduleImport("hashlib")
|
||||
.getMember("file_digest")
|
||||
.getACall()
|
||||
.getParameter(1, "digest")) and
|
||||
// Ignore sources that are hash constructors, allow `HashlibMemberAlgorithm` to detect these
|
||||
this != hashlibMemberHashAlgorithm(_) and
|
||||
// Ignore sources that are HMAC objects, to be handled by HmacModule
|
||||
this != API::moduleImport("hmac").getMember("new").getACall() and
|
||||
this != API::moduleImport("hmac").getMember("HMAC").getACall()
|
||||
}
|
||||
|
||||
override string getName() {
|
||||
// Name is a string constant or consider the name unknown
|
||||
// NOTE: we are excluding hmac.new and hmac.HMAC constructor calls so we are expecting
|
||||
// a string or an outside configuration only
|
||||
result = super.normalizeName(this.asExpr().(StrConst).getText())
|
||||
or
|
||||
not this.asExpr() instanceof StrConst and
|
||||
result = unknownAlgorithm()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a member access of hashlib that is an algorithm invocation.
|
||||
* `hashName` is the name of the hash algorithm.
|
||||
*
|
||||
* Note: oringally a variant of this predicate was in codeql/github/main
|
||||
* to a predicate to avoid a bad join order.
|
||||
*/
|
||||
// Copying use of nomagic from similar predicate in codeql/main
|
||||
pragma[nomagic]
|
||||
DataFlow::Node hashlibMemberHashAlgorithm(string hashName) {
|
||||
result = API::moduleImport("hashlib").getMember(hashName).asSource() and
|
||||
// Don't matches known non-hash members
|
||||
not hashName in [
|
||||
"new", "pbkdf2_hmac", "algorithms_available", "algorithms_guaranteed", "file_digest"
|
||||
] and
|
||||
// Don't match things like __file__
|
||||
not hashName.regexpMatch("_.*")
|
||||
}
|
||||
|
||||
/**
|
||||
* Identifies hashing algorithm members (i.e., functions) of the `hashlib` module,
|
||||
* e.g., `hashlib.sha512`.
|
||||
*/
|
||||
class HashlibMemberAlgorithm extends HashAlgorithm {
|
||||
HashlibMemberAlgorithm() { this = hashlibMemberHashAlgorithm(_) }
|
||||
|
||||
override string getName() {
|
||||
exists(string rawName |
|
||||
result = super.normalizeName(rawName) and this = hashlibMemberHashAlgorithm(rawName)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// Key Derivation Functions
|
||||
// -----------------------------------------------
|
||||
module KDF {
|
||||
// NOTE: Only finds the params of `pbkdf2_hmac` that are non-optional
|
||||
// dk_len is optional, i.e., can be None, and if addressed in this predicate
|
||||
// would result in an unsatisfiable predicate.
|
||||
predicate hashlibPBDKF2HMACKDFRequiredParams(
|
||||
HashlibPbkdf2HMACOperation kdf, API::Node hashParam, API::Node saltParam,
|
||||
API::Node iterationParam
|
||||
) {
|
||||
kdf.getParameter(0, "hash_name") = hashParam and
|
||||
kdf.getParameter(2, "salt") = saltParam and
|
||||
kdf.getParameter(3, "iterations") = iterationParam
|
||||
}
|
||||
|
||||
predicate hashlibPBDKF2HMACKDFOptionalParams(HashlibPbkdf2HMACOperation kdf, API::Node keylenParam) {
|
||||
kdf.getParameter(4, "dklen") = keylenParam
|
||||
}
|
||||
|
||||
/**
|
||||
* Identifies kery derivation function hashlib.pbdkf2_hmac accesses.
|
||||
* https://docs.python.org/3/library/hashlib.html#hashlib.pbkdf2_hmac
|
||||
*/
|
||||
class HashlibPbkdf2HMACOperation extends KeyDerivationAlgorithm, KeyDerivationOperation {
|
||||
HashlibPbkdf2HMACOperation() {
|
||||
this = API::moduleImport("hashlib").getMember("pbkdf2_hmac").getACall()
|
||||
}
|
||||
|
||||
override string getName() { result = super.normalizeName("pbkdf2_hmac") }
|
||||
|
||||
override DataFlow::Node getIterationSizeSrc() {
|
||||
exists(API::Node it | hashlibPBDKF2HMACKDFRequiredParams(this, _, _, it) |
|
||||
result = Utils::getUltimateSrcFromApiNode(it)
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getSaltConfigSrc() {
|
||||
exists(API::Node s | hashlibPBDKF2HMACKDFRequiredParams(this, _, s, _) |
|
||||
result = Utils::getUltimateSrcFromApiNode(s)
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getHashConfigSrc() {
|
||||
exists(API::Node h | hashlibPBDKF2HMACKDFRequiredParams(this, h, _, _) |
|
||||
result = Utils::getUltimateSrcFromApiNode(h)
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getDerivedKeySizeSrc() {
|
||||
exists(API::Node dk | hashlibPBDKF2HMACKDFOptionalParams(this, dk) |
|
||||
result = Utils::getUltimateSrcFromApiNode(dk)
|
||||
)
|
||||
}
|
||||
|
||||
// TODO: if DK is none, then the length is based on the hash type, if hash length not known, must call this unknown
|
||||
// The issue is the src is what we model not the size
|
||||
// For now, we are not modeling this and are relying on the fact that the accepted hashes are of accepted length.
|
||||
// I.e., any query looking at length will ignore cases where it is unknown
|
||||
override KeyDerivationAlgorithm getAlgorithm() { result = this }
|
||||
|
||||
override predicate requiresHash() { any() }
|
||||
|
||||
override predicate requiresMode() { none() }
|
||||
|
||||
override predicate requiresSalt() { any() }
|
||||
|
||||
override predicate requiresIteration() { any() }
|
||||
}
|
||||
|
||||
// TODO: better modeling of scrypt
|
||||
/**
|
||||
* Identifies key derivation fucntion hashlib.scrypt accesses.
|
||||
*/
|
||||
class HashlibScryptAlgorithm extends KeyDerivationAlgorithm, KeyDerivationOperation {
|
||||
HashlibScryptAlgorithm() { this = API::moduleImport("hashlib").getMember("scrypt").getACall() }
|
||||
|
||||
override string getName() { result = super.normalizeName("scrypt") }
|
||||
|
||||
override DataFlow::Node getIterationSizeSrc() { none() }
|
||||
|
||||
override DataFlow::Node getSaltConfigSrc() {
|
||||
// TODO: need to address getting salt from params, unsure how this works in CodeQL
|
||||
// since the signature is defined as hashlib.scrypt(password, *, salt, n, r, p, maxmem=0, dklen=64)
|
||||
// What position is 'salt' then such that we can reliably extract it?
|
||||
none()
|
||||
}
|
||||
|
||||
override DataFlow::Node getHashConfigSrc() { none() }
|
||||
|
||||
override DataFlow::Node getDerivedKeySizeSrc() {
|
||||
//TODO: see comment for getSaltConfigSrc above
|
||||
none()
|
||||
}
|
||||
|
||||
override KeyDerivationAlgorithm getAlgorithm() { result = this }
|
||||
|
||||
override predicate requiresHash() { none() }
|
||||
|
||||
override predicate requiresMode() { none() }
|
||||
|
||||
override predicate requiresSalt() { any() }
|
||||
|
||||
override predicate requiresIteration() { none() }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
import python
|
||||
import semmle.python.ApiGraphs
|
||||
import experimental.cryptography.CryptoArtifact
|
||||
private import experimental.cryptography.utils.Utils as Utils
|
||||
private import experimental.cryptography.CryptoAlgorithmNames
|
||||
private import experimental.cryptography.modules.stdlib.HashlibModule as HashlibModule
|
||||
|
||||
/**
|
||||
* `hmac` is a ptyhon standard library module for key-based hashing algorithms.
|
||||
* https://docs.python.org/3/library/hmac.html
|
||||
*/
|
||||
// -----------------------------------------------
|
||||
// Hash Artifacts
|
||||
// -----------------------------------------------
|
||||
module Hashes {
|
||||
class GenericHmacHashCall extends API::CallNode {
|
||||
GenericHmacHashCall() {
|
||||
this = API::moduleImport("hmac").getMember("new").getACall() or
|
||||
this = API::moduleImport("hmac").getMember("HMAC").getACall() or
|
||||
this = API::moduleImport("hmac").getMember("digest").getACall()
|
||||
}
|
||||
}
|
||||
|
||||
DataFlow::Node getDigestModParamSrc(GenericHmacHashCall call) {
|
||||
result = Utils::getUltimateSrcFromApiNode(call.(API::CallNode).getParameter(2, "digestmod"))
|
||||
}
|
||||
|
||||
/**
|
||||
* This class captures the common behavior for all HMAC operations:
|
||||
* hmac.HMAC https://docs.python.org/3/library/hmac.html#hmac.HMAC
|
||||
* hmac.new https://docs.python.org/3/library/hmac.html#hmac.new
|
||||
* hmac.digest https://docs.python.org/3/library/hmac.html#hmac.digest
|
||||
* These operations commonly set the algorithm as a string in the third argument (`digestmod`)
|
||||
* of the operation itself.
|
||||
*
|
||||
* NOTE: `digestmod` is the digest name, digest constructor or module for the HMAC object to use, however
|
||||
* this class only identifies string names. The other forms are found by CryptopgraphicArtifacts,
|
||||
* modeled in `HmacHMACConsArtifact` and `Hashlib.qll`, specifically through hashlib.new and
|
||||
* direct member accesses (e.g., hashlib.md5).
|
||||
*
|
||||
* Where no `digestmod` exists, the algorithm is assumed to be `md5` per the docs found here:
|
||||
* https://docs.python.org/3/library/hmac.html#hmac.new
|
||||
*
|
||||
* Where `digestmod` exists but is not a string and not a hashlib algorithm, it is assumed
|
||||
* to be `UNKNOWN`. Note this includes cases wheere the digest is provided as a `A module supporting PEP 247.`
|
||||
* Such modules are currently not modeled.
|
||||
*/
|
||||
class HmacGenericAlgorithm extends HashAlgorithm {
|
||||
HmacGenericAlgorithm() {
|
||||
exists(GenericHmacHashCall c |
|
||||
if not exists(getDigestModParamSrc(c)) then this = c else this = getDigestModParamSrc(c)
|
||||
) and
|
||||
// Ignore case where the algorithm is a hashlib algorithm, rely on `HashlibMemberAlgorithm` to catch these cases
|
||||
not this instanceof HashlibModule::Hashes::HashlibMemberAlgorithm
|
||||
// NOTE: the docs say the digest can be `A module supporting PEP 247.`, however, this is not modeled, and will be considered UNKNOWN
|
||||
}
|
||||
|
||||
override string getName() {
|
||||
// when the this is a generic hmac call
|
||||
// it means the algorithm parameter was not identified, assume the default case of 'md5' (per the docs)
|
||||
if this instanceof GenericHmacHashCall
|
||||
then result = super.normalizeName("MD5")
|
||||
else (
|
||||
// Else get the string name, if its a string constant, or UNKNOWN if otherwise
|
||||
result = super.normalizeName(this.asExpr().(StrConst).getText())
|
||||
or
|
||||
not this.asExpr() instanceof StrConst and result = unknownAlgorithm()
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
/**
|
||||
* Patches all DataFlow::CallCfgNode adding a getTarget predicate to a new
|
||||
* subclass of CallCfgNode
|
||||
*/
|
||||
|
||||
import python
|
||||
private import semmle.python.dataflow.new.internal.TypeTrackerSpecific
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
class CallCfgNodeWithTarget extends DataFlow::Node instanceof DataFlow::CallCfgNode {
|
||||
DataFlow::Node getTarget() { returnStep(result, this) }
|
||||
}
|
||||
21
python/ql/lib/experimental/cryptography/utils/Utils.qll
Normal file
21
python/ql/lib/experimental/cryptography/utils/Utils.qll
Normal file
@@ -0,0 +1,21 @@
|
||||
import python
|
||||
private import semmle.python.ApiGraphs
|
||||
private import experimental.cryptography.utils.CallCfgNodeWithTarget
|
||||
|
||||
/**
|
||||
* Gets an ultimate local source (not a source in a library)
|
||||
*/
|
||||
DataFlow::Node getUltimateSrcFromApiNode(API::Node n) {
|
||||
result = n.getAValueReachingSink() and
|
||||
(
|
||||
// the result is a call to a library only
|
||||
result instanceof CallCfgNodeWithTarget and
|
||||
not result.(CallCfgNodeWithTarget).getTarget().asExpr().getEnclosingModule().inSource()
|
||||
or
|
||||
// the result is not a call, and not a function signataure or parameter
|
||||
not result instanceof CallCfgNodeWithTarget and
|
||||
not result instanceof DataFlow::ParameterNode and
|
||||
not result.asExpr() instanceof FunctionExpr and
|
||||
result.asExpr().getEnclosingModule().inSource()
|
||||
)
|
||||
}
|
||||
@@ -1,15 +1,17 @@
|
||||
name: codeql/python-all
|
||||
version: 0.9.3-dev
|
||||
version: 0.11.4-dev
|
||||
groups: python
|
||||
dbscheme: semmlecode.python.dbscheme
|
||||
extractor: python
|
||||
library: true
|
||||
upgrades: upgrades
|
||||
dependencies:
|
||||
codeql/dataflow: ${workspace}
|
||||
codeql/mad: ${workspace}
|
||||
codeql/regex: ${workspace}
|
||||
codeql/tutorial: ${workspace}
|
||||
codeql/util: ${workspace}
|
||||
codeql/yaml: ${workspace}
|
||||
dataExtensions:
|
||||
- semmle/python/frameworks/**/model.yml
|
||||
- semmle/python/frameworks/**/*.model.yml
|
||||
warnOnImplicitThis: true
|
||||
|
||||
@@ -155,18 +155,6 @@ module API {
|
||||
*/
|
||||
DataFlow::LocalSourceNode asSource() { Impl::use(this, result) }
|
||||
|
||||
/** DEPRECATED. This predicate has been renamed to `getAValueReachableFromSource()`. */
|
||||
deprecated DataFlow::Node getAUse() { result = this.getAValueReachableFromSource() }
|
||||
|
||||
/** DEPRECATED. This predicate has been renamed to `asSource()`. */
|
||||
deprecated DataFlow::LocalSourceNode getAnImmediateUse() { result = this.asSource() }
|
||||
|
||||
/** DEPRECATED. This predicate has been renamed to `asSink()`. */
|
||||
deprecated DataFlow::Node getARhs() { result = this.asSink() }
|
||||
|
||||
/** DEPRECATED. This predicate has been renamed to `getAValueReachingSink()`. */
|
||||
deprecated DataFlow::Node getAValueReachingRhs() { result = this.getAValueReachingSink() }
|
||||
|
||||
/**
|
||||
* Gets a call to the function represented by this API component.
|
||||
*/
|
||||
|
||||
@@ -154,6 +154,28 @@ class StringPart extends StringPart_, AstNode {
|
||||
override string toString() { result = StringPart_.super.toString() }
|
||||
|
||||
override Location getLocation() { result = StringPart_.super.getLocation() }
|
||||
|
||||
/**
|
||||
* Holds if the content of string `StringPart` is surrounded by
|
||||
* a prefix (including a quote) of length `prefixLength` and
|
||||
* a quote of length `quoteLength`.
|
||||
*/
|
||||
predicate contextSize(int prefixLength, int quoteLength) {
|
||||
exists(int occurrenceOffset |
|
||||
quoteLength = this.getText().regexpFind("\"{3}|\"{1}|'{3}|'{1}", 0, occurrenceOffset).length() and
|
||||
prefixLength = occurrenceOffset + quoteLength
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the length of the content, that is the text between the prefix and the quote.
|
||||
* See `context` for obtaining the prefix and the quote.
|
||||
*/
|
||||
int getContentLength() {
|
||||
exists(int prefixLength, int quoteLength | this.contextSize(prefixLength, quoteLength) |
|
||||
result = this.getText().length() - prefixLength - quoteLength
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class StringPartList extends StringPartList_ { }
|
||||
@@ -198,3 +220,56 @@ class StringList extends StringList_ { }
|
||||
|
||||
/** A list of aliases in an import statement */
|
||||
class AliasList extends AliasList_ { }
|
||||
|
||||
/** A generic type parameter, as seen in function, class, and type alias definitions. */
|
||||
class TypeParameter extends TypeParameter_, AstNode {
|
||||
/** Gets a textual representation of this element */
|
||||
override string toString() { result = TypeParameter_.super.toString() }
|
||||
|
||||
override AstNode getAChildNode() { none() }
|
||||
|
||||
override Scope getScope() {
|
||||
exists(Function f | this = f.getATypeParameter() and result = f)
|
||||
or
|
||||
exists(ClassExpr c | this = c.getATypeParameter() and result = c.getInnerScope())
|
||||
or
|
||||
// For `TypeAlias`, this is not quite right. Instead, `TypeAlias`es should define their own scopes, cf. https://docs.python.org/3.12/reference/executionmodel.html#annotation-scopes
|
||||
exists(TypeAlias t | this = t.getATypeParameter() and result = t.getScope())
|
||||
}
|
||||
|
||||
/** Gets the location of this element */
|
||||
override Location getLocation() { result = TypeParameter_.super.getLocation() }
|
||||
}
|
||||
|
||||
/** A list of type parameters */
|
||||
class TypeParameterList extends TypeParameterList_ { }
|
||||
|
||||
/** A parent of a `TypeParameterList`. Internal implementation class. */
|
||||
class TypeParameterListParent extends TypeParameterListParent_ { }
|
||||
|
||||
/** A type alias statement, such as `type T[T1,T2] = T3`. */
|
||||
class TypeAlias extends TypeAlias_, Stmt {
|
||||
/** Gets the name of this type alias. */
|
||||
override Name getName() { result = super.getName() }
|
||||
}
|
||||
|
||||
/** A type variable, with an optional bound, such as `T1` and `T2` in `type T[T1, T2: T3] = T4`. */
|
||||
class TypeVar extends TypeVar_, TypeParameter {
|
||||
override Name getName() { result = super.getName() }
|
||||
|
||||
override Expr getAChildNode() { result in [this.getName(), this.getBound()] }
|
||||
}
|
||||
|
||||
/** A type var tuple parameter, such as `*T1` in `type T[*T1] = T2`. */
|
||||
class TypeVarTuple extends TypeVarTuple_, TypeParameter {
|
||||
override Name getName() { result = super.getName() }
|
||||
|
||||
override Expr getAChildNode() { result = this.getName() }
|
||||
}
|
||||
|
||||
/** A param spec parameter, such as `**T1` in `type T[**T1] = T2`. */
|
||||
class ParamSpec extends ParamSpec_, TypeParameter {
|
||||
override Name getName() { result = super.getName() }
|
||||
|
||||
override Expr getAChildNode() { result = this.getName() }
|
||||
}
|
||||
|
||||
@@ -285,6 +285,15 @@ class ClassExpr_ extends @py_ClassExpr, Expr {
|
||||
/** Gets the class scope of this class definition. */
|
||||
Class getInnerScope() { py_Classes(result, this) }
|
||||
|
||||
/** Gets the type parameters of this class definition. */
|
||||
TypeParameterList getTypeParameters() { py_type_parameter_lists(result, this) }
|
||||
|
||||
/** Gets the nth type parameter of this class definition. */
|
||||
TypeParameter getTypeParameter(int index) { result = this.getTypeParameters().getItem(index) }
|
||||
|
||||
/** Gets a type parameter of this class definition. */
|
||||
TypeParameter getATypeParameter() { result = this.getTypeParameters().getAnItem() }
|
||||
|
||||
override string toString() { result = "ClassExpr" }
|
||||
}
|
||||
|
||||
@@ -554,6 +563,15 @@ class Function_ extends @py_Function {
|
||||
/** Whether the async property of this function is true. */
|
||||
predicate isAsync() { py_bools(this, 6) }
|
||||
|
||||
/** Gets the type parameters of this function. */
|
||||
TypeParameterList getTypeParameters() { py_type_parameter_lists(result, this) }
|
||||
|
||||
/** Gets the nth type parameter of this function. */
|
||||
TypeParameter getTypeParameter(int index) { result = this.getTypeParameters().getItem(index) }
|
||||
|
||||
/** Gets a type parameter of this function. */
|
||||
TypeParameter getATypeParameter() { result = this.getTypeParameters().getAnItem() }
|
||||
|
||||
/** Gets a parent of this function */
|
||||
FunctionParent getParent() { py_Functions(this, result) }
|
||||
|
||||
@@ -1103,6 +1121,14 @@ class Param_ extends @py_Param, ExprContext {
|
||||
override string toString() { result = "Param" }
|
||||
}
|
||||
|
||||
/** INTERNAL: See the class `ParamSpec` for further information. */
|
||||
class ParamSpec_ extends @py_ParamSpec, TypeParameter {
|
||||
/** Gets the name of this parameter spec. */
|
||||
Expr getName() { py_exprs(result, _, this, 1) }
|
||||
|
||||
override string toString() { result = "ParamSpec" }
|
||||
}
|
||||
|
||||
/** INTERNAL: See the class `Pass` for further information. */
|
||||
class Pass_ extends @py_Pass, Stmt {
|
||||
override string toString() { result = "Pass" }
|
||||
@@ -1412,6 +1438,45 @@ class Tuple_ extends @py_Tuple, Expr {
|
||||
override string toString() { result = "Tuple" }
|
||||
}
|
||||
|
||||
/** INTERNAL: See the class `TypeAlias` for further information. */
|
||||
class TypeAlias_ extends @py_TypeAlias, Stmt {
|
||||
/** Gets the name of this type alias. */
|
||||
Expr getName() { py_exprs(result, _, this, 1) }
|
||||
|
||||
/** Gets the type_parameters of this type alias. */
|
||||
TypeParameterList getTypeParameters() { py_type_parameter_lists(result, this) }
|
||||
|
||||
/** Gets the nth type_parameter of this type alias. */
|
||||
TypeParameter getTypeParameter(int index) { result = this.getTypeParameters().getItem(index) }
|
||||
|
||||
/** Gets a type_parameter of this type alias. */
|
||||
TypeParameter getATypeParameter() { result = this.getTypeParameters().getAnItem() }
|
||||
|
||||
/** Gets the value of this type alias. */
|
||||
Expr getValue() { py_exprs(result, _, this, 3) }
|
||||
|
||||
override string toString() { result = "TypeAlias" }
|
||||
}
|
||||
|
||||
/** INTERNAL: See the class `TypeVar` for further information. */
|
||||
class TypeVar_ extends @py_TypeVar, TypeParameter {
|
||||
/** Gets the name of this type variable. */
|
||||
Expr getName() { py_exprs(result, _, this, 1) }
|
||||
|
||||
/** Gets the bound of this type variable. */
|
||||
Expr getBound() { py_exprs(result, _, this, 2) }
|
||||
|
||||
override string toString() { result = "TypeVar" }
|
||||
}
|
||||
|
||||
/** INTERNAL: See the class `TypeVarTuple` for further information. */
|
||||
class TypeVarTuple_ extends @py_TypeVarTuple, TypeParameter {
|
||||
/** Gets the name of this type variable tuple. */
|
||||
Expr getName() { py_exprs(result, _, this, 1) }
|
||||
|
||||
override string toString() { result = "TypeVarTuple" }
|
||||
}
|
||||
|
||||
/** INTERNAL: See the class `UAdd` for further information. */
|
||||
class UAdd_ extends @py_UAdd, Unaryop {
|
||||
override string toString() { result = "UAdd" }
|
||||
@@ -1908,6 +1973,39 @@ class StrParent_ extends @py_str_parent {
|
||||
string toString() { result = "StrParent" }
|
||||
}
|
||||
|
||||
/** INTERNAL: See the class `TypeParameter` for further information. */
|
||||
class TypeParameter_ extends @py_type_parameter {
|
||||
/** Gets the location of this type parameter. */
|
||||
Location getLocation() { py_locations(result, this) }
|
||||
|
||||
/** Gets a parent of this type parameter */
|
||||
TypeParameterList getParent() { py_type_parameters(this, _, result, _) }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "TypeParameter" }
|
||||
}
|
||||
|
||||
/** INTERNAL: See the class `TypeParameterList` for further information. */
|
||||
class TypeParameterList_ extends @py_type_parameter_list {
|
||||
/** Gets a parent of this type parameter list */
|
||||
TypeParameterListParent getParent() { py_type_parameter_lists(this, result) }
|
||||
|
||||
/** Gets an item of this type parameter list */
|
||||
TypeParameter getAnItem() { py_type_parameters(result, _, this, _) }
|
||||
|
||||
/** Gets the nth item of this type parameter list */
|
||||
TypeParameter getItem(int index) { py_type_parameters(result, _, this, index) }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "TypeParameterList" }
|
||||
}
|
||||
|
||||
/** INTERNAL: See the class `TypeParameterListParent` for further information. */
|
||||
class TypeParameterListParent_ extends @py_type_parameter_list_parent {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "TypeParameterListParent" }
|
||||
}
|
||||
|
||||
/** INTERNAL: See the class `Unaryop` for further information. */
|
||||
class Unaryop_ extends @py_unaryop {
|
||||
/** Gets a parent of this unary operation */
|
||||
|
||||
@@ -378,6 +378,68 @@ module SqlExecution {
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides a class for modeling NoSQL execution APIs. */
|
||||
module NoSqlExecution {
|
||||
/**
|
||||
* A data-flow node that executes NoSQL queries.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `NoSqlExecution` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the argument that specifies the NoSQL query to be executed. */
|
||||
abstract DataFlow::Node getQuery();
|
||||
|
||||
/** Holds if this query will unpack/interpret a dictionary */
|
||||
abstract predicate interpretsDict();
|
||||
|
||||
/** Holds if this query can be dangerous when run on a user-controlled string */
|
||||
abstract predicate vulnerableToStrings();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that executes NoSQL queries.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `NoSqlExecution::Range` instead.
|
||||
*/
|
||||
class NoSqlExecution extends DataFlow::Node instanceof NoSqlExecution::Range {
|
||||
/** Gets the argument that specifies the NoSQL query to be executed. */
|
||||
DataFlow::Node getQuery() { result = super.getQuery() }
|
||||
|
||||
/** Holds if this query will unpack/interpret a dictionary */
|
||||
predicate interpretsDict() { super.interpretsDict() }
|
||||
|
||||
/** Holds if this query can be dangerous when run on a user-controlled string */
|
||||
predicate vulnerableToStrings() { super.vulnerableToStrings() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling NoSql sanitization-related APIs. */
|
||||
module NoSqlSanitizer {
|
||||
/**
|
||||
* A data-flow node that collects functions sanitizing NoSQL queries.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `NoSQLSanitizer` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the argument that specifies the NoSql query to be sanitized. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that collects functions sanitizing NoSQL queries.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `NoSQLSanitizer::Range` instead.
|
||||
*/
|
||||
class NoSqlSanitizer extends DataFlow::Node instanceof NoSqlSanitizer::Range {
|
||||
/** Gets the argument that specifies the NoSql query to be sanitized. */
|
||||
DataFlow::Node getAnInput() { result = super.getAnInput() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that executes a regular expression.
|
||||
*
|
||||
@@ -680,6 +742,9 @@ module Escaping {
|
||||
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
|
||||
string getHtmlKind() { result = "html" }
|
||||
|
||||
/** Gets the escape-kind for escaping a string so it can safely be included in XML. */
|
||||
string getXmlKind() { result = "xml" }
|
||||
|
||||
/** Gets the escape-kind for escaping a string so it can safely be included in a regular expression. */
|
||||
string getRegexKind() { result = "regex" }
|
||||
|
||||
@@ -710,6 +775,15 @@ class HtmlEscaping extends Escaping {
|
||||
HtmlEscaping() { super.getKind() = Escaping::getHtmlKind() }
|
||||
}
|
||||
|
||||
/**
|
||||
* An escape of a string so it can be safely included in
|
||||
* the body of an XML element, for example, replacing `&` and `<>` in
|
||||
* `<foo>&xxe;<foo>`.
|
||||
*/
|
||||
class XmlEscaping extends Escaping {
|
||||
XmlEscaping() { super.getKind() = Escaping::getXmlKind() }
|
||||
}
|
||||
|
||||
/**
|
||||
* An escape of a string so it can be safely included in
|
||||
* the body of a regex.
|
||||
@@ -854,7 +928,10 @@ module Http {
|
||||
|
||||
override Parameter getARoutedParameter() {
|
||||
result = rs.getARoutedParameter() and
|
||||
result in [this.getArg(_), this.getArgByName(_)]
|
||||
result in [
|
||||
this.getArg(_), this.getArgByName(_), this.getVararg().(Parameter),
|
||||
this.getKwarg().(Parameter)
|
||||
]
|
||||
}
|
||||
|
||||
override string getFramework() { result = rs.getFramework() }
|
||||
|
||||
@@ -1,7 +1,32 @@
|
||||
/** Provides classes for working with files and folders. */
|
||||
|
||||
import python
|
||||
private import codeql.util.FileSystem
|
||||
|
||||
private module Input implements InputSig {
|
||||
abstract class ContainerBase extends @container {
|
||||
abstract string getAbsolutePath();
|
||||
|
||||
ContainerBase getParentContainer() { containerparent(result, this) }
|
||||
|
||||
string toString() { result = this.getAbsolutePath() }
|
||||
}
|
||||
|
||||
class FolderBase extends ContainerBase, @folder {
|
||||
override string getAbsolutePath() { folders(this, result) }
|
||||
}
|
||||
|
||||
class FileBase extends ContainerBase, @file {
|
||||
override string getAbsolutePath() { files(this, result) }
|
||||
}
|
||||
|
||||
predicate hasSourceLocationPrefix = sourceLocationPrefix/1;
|
||||
}
|
||||
|
||||
private module Impl = Make<Input>;
|
||||
|
||||
/** A file */
|
||||
class File extends Container, @file {
|
||||
class File extends Container, Impl::File {
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
@@ -45,11 +70,6 @@ class File extends Container, @file {
|
||||
)
|
||||
}
|
||||
|
||||
override string getAbsolutePath() { files(this, result) }
|
||||
|
||||
/** Gets the URL of this file. */
|
||||
override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" }
|
||||
|
||||
override Container getImportRoot(int n) {
|
||||
/* File stem must be a legal Python identifier */
|
||||
this.getStem().regexpMatch("[^\\d\\W]\\w*") and
|
||||
@@ -108,7 +128,7 @@ private predicate occupied_line(File f, int n) {
|
||||
}
|
||||
|
||||
/** A folder (directory) */
|
||||
class Folder extends Container, @folder {
|
||||
class Folder extends Container, Impl::Folder {
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
@@ -126,11 +146,6 @@ class Folder extends Container, @folder {
|
||||
endcolumn = 0
|
||||
}
|
||||
|
||||
override string getAbsolutePath() { folders(this, result) }
|
||||
|
||||
/** Gets the URL of this folder. */
|
||||
override string getURL() { result = "folder://" + this.getAbsolutePath() }
|
||||
|
||||
override Container getImportRoot(int n) {
|
||||
this.isImportRoot(n) and result = this
|
||||
or
|
||||
@@ -144,34 +159,8 @@ class Folder extends Container, @folder {
|
||||
* A container is an abstract representation of a file system object that can
|
||||
* hold elements of interest.
|
||||
*/
|
||||
abstract class Container extends @container {
|
||||
Container getParent() { containerparent(result, this) }
|
||||
|
||||
/**
|
||||
* Gets a textual representation of the path of this container.
|
||||
*
|
||||
* This is the absolute path of the container.
|
||||
*/
|
||||
string toString() { result = this.getAbsolutePath() }
|
||||
|
||||
/**
|
||||
* Gets the relative path of this file or folder from the root folder of the
|
||||
* analyzed source location. The relative path of the root folder itself is
|
||||
* the empty string.
|
||||
*
|
||||
* This has no result if the container is outside the source root, that is,
|
||||
* if the root folder is not a reflexive, transitive parent of this container.
|
||||
*/
|
||||
string getRelativePath() {
|
||||
exists(string absPath, string pref |
|
||||
absPath = this.getAbsolutePath() and sourceLocationPrefix(pref)
|
||||
|
|
||||
absPath = pref and result = ""
|
||||
or
|
||||
absPath = pref.regexpReplaceAll("/$", "") + "/" + result and
|
||||
not result.matches("/%")
|
||||
)
|
||||
}
|
||||
class Container extends Impl::Container {
|
||||
Container getParent() { result = this.getParentContainer() }
|
||||
|
||||
/** Whether this file or folder is part of the standard library */
|
||||
predicate inStdlib() { this.inStdlib(_, _) }
|
||||
@@ -187,135 +176,13 @@ abstract class Container extends @container {
|
||||
)
|
||||
}
|
||||
|
||||
/* Standard cross-language API */
|
||||
/** Gets a file or sub-folder in this container. */
|
||||
Container getAChildContainer() { containerparent(this, result) }
|
||||
|
||||
/** Gets a file in this container. */
|
||||
File getAFile() { result = this.getAChildContainer() }
|
||||
|
||||
/** Gets a sub-folder in this container. */
|
||||
Folder getAFolder() { result = this.getAChildContainer() }
|
||||
|
||||
/**
|
||||
* Gets the absolute, canonical path of this container, using forward slashes
|
||||
* as path separator.
|
||||
*
|
||||
* The path starts with a _root prefix_ followed by zero or more _path
|
||||
* segments_ separated by forward slashes.
|
||||
*
|
||||
* The root prefix is of one of the following forms:
|
||||
*
|
||||
* 1. A single forward slash `/` (Unix-style)
|
||||
* 2. An upper-case drive letter followed by a colon and a forward slash,
|
||||
* such as `C:/` (Windows-style)
|
||||
* 3. Two forward slashes, a computer name, and then another forward slash,
|
||||
* such as `//FileServer/` (UNC-style)
|
||||
*
|
||||
* Path segments are never empty (that is, absolute paths never contain two
|
||||
* contiguous slashes, except as part of a UNC-style root prefix). Also, path
|
||||
* segments never contain forward slashes, and no path segment is of the
|
||||
* form `.` (one dot) or `..` (two dots).
|
||||
*
|
||||
* Note that an absolute path never ends with a forward slash, except if it is
|
||||
* a bare root prefix, that is, the path has no path segments. A container
|
||||
* whose absolute path has no segments is always a `Folder`, not a `File`.
|
||||
*/
|
||||
abstract string getAbsolutePath();
|
||||
|
||||
/**
|
||||
* Gets the base name of this container including extension, that is, the last
|
||||
* segment of its absolute path, or the empty string if it has no segments.
|
||||
*
|
||||
* Here are some examples of absolute paths and the corresponding base names
|
||||
* (surrounded with quotes to avoid ambiguity):
|
||||
*
|
||||
* <table border="1">
|
||||
* <tr><th>Absolute path</th><th>Base name</th></tr>
|
||||
* <tr><td>"/tmp/tst.py"</td><td>"tst.py"</td></tr>
|
||||
* <tr><td>"C:/Program Files (x86)"</td><td>"Program Files (x86)"</td></tr>
|
||||
* <tr><td>"/"</td><td>""</td></tr>
|
||||
* <tr><td>"C:/"</td><td>""</td></tr>
|
||||
* <tr><td>"D:/"</td><td>""</td></tr>
|
||||
* <tr><td>"//FileServer/"</td><td>""</td></tr>
|
||||
* </table>
|
||||
*/
|
||||
string getBaseName() {
|
||||
result = this.getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the extension of this container, that is, the suffix of its base name
|
||||
* after the last dot character, if any.
|
||||
*
|
||||
* In particular,
|
||||
*
|
||||
* - if the name does not include a dot, there is no extension, so this
|
||||
* predicate has no result;
|
||||
* - if the name ends in a dot, the extension is the empty string;
|
||||
* - if the name contains multiple dots, the extension follows the last dot.
|
||||
*
|
||||
* Here are some examples of absolute paths and the corresponding extensions
|
||||
* (surrounded with quotes to avoid ambiguity):
|
||||
*
|
||||
* <table border="1">
|
||||
* <tr><th>Absolute path</th><th>Extension</th></tr>
|
||||
* <tr><td>"/tmp/tst.py"</td><td>"py"</td></tr>
|
||||
* <tr><td>"/tmp/.gitignore"</td><td>"gitignore"</td></tr>
|
||||
* <tr><td>"/bin/bash"</td><td>not defined</td></tr>
|
||||
* <tr><td>"/tmp/tst2."</td><td>""</td></tr>
|
||||
* <tr><td>"/tmp/x.tar.gz"</td><td>"gz"</td></tr>
|
||||
* </table>
|
||||
*/
|
||||
string getExtension() {
|
||||
result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the stem of this container, that is, the prefix of its base name up to
|
||||
* (but not including) the last dot character if there is one, or the entire
|
||||
* base name if there is not.
|
||||
*
|
||||
* Here are some examples of absolute paths and the corresponding stems
|
||||
* (surrounded with quotes to avoid ambiguity):
|
||||
*
|
||||
* <table border="1">
|
||||
* <tr><th>Absolute path</th><th>Stem</th></tr>
|
||||
* <tr><td>"/tmp/tst.py"</td><td>"tst"</td></tr>
|
||||
* <tr><td>"/tmp/.gitignore"</td><td>""</td></tr>
|
||||
* <tr><td>"/bin/bash"</td><td>"bash"</td></tr>
|
||||
* <tr><td>"/tmp/tst2."</td><td>"tst2"</td></tr>
|
||||
* <tr><td>"/tmp/x.tar.gz"</td><td>"x.tar"</td></tr>
|
||||
* </table>
|
||||
*/
|
||||
string getStem() {
|
||||
result = this.getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1)
|
||||
}
|
||||
|
||||
File getFile(string baseName) {
|
||||
result = this.getAFile() and
|
||||
result.getBaseName() = baseName
|
||||
}
|
||||
|
||||
Folder getFolder(string baseName) {
|
||||
result = this.getAFolder() and
|
||||
result.getBaseName() = baseName
|
||||
}
|
||||
|
||||
Container getParentContainer() { this = result.getAChildContainer() }
|
||||
override Container getParentContainer() { result = super.getParentContainer() }
|
||||
|
||||
Container getChildContainer(string baseName) {
|
||||
result = this.getAChildContainer() and
|
||||
result.getBaseName() = baseName
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a URL representing the location of this container.
|
||||
*
|
||||
* For more information see [Providing URLs](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-urls).
|
||||
*/
|
||||
abstract string getURL();
|
||||
|
||||
/** Holds if this folder is on the import path. */
|
||||
predicate isImportRoot() { this.isImportRoot(_) }
|
||||
|
||||
|
||||
@@ -547,6 +547,31 @@ class IfExprNode extends ControlFlowNode {
|
||||
override IfExp getNode() { result = super.getNode() }
|
||||
}
|
||||
|
||||
/** A control flow node corresponding to an assignment expression such as `lhs := rhs`. */
|
||||
class AssignmentExprNode extends ControlFlowNode {
|
||||
AssignmentExprNode() { toAst(this) instanceof AssignExpr }
|
||||
|
||||
/** Gets the flow node corresponding to the left-hand side of the assignment expression */
|
||||
ControlFlowNode getTarget() {
|
||||
exists(AssignExpr a |
|
||||
this.getNode() = a and
|
||||
a.getTarget() = result.getNode() and
|
||||
result.getBasicBlock().dominates(this.getBasicBlock())
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the flow node corresponding to the right-hand side of the assignment expression */
|
||||
ControlFlowNode getValue() {
|
||||
exists(AssignExpr a |
|
||||
this.getNode() = a and
|
||||
a.getValue() = result.getNode() and
|
||||
result.getBasicBlock().dominates(this.getBasicBlock())
|
||||
)
|
||||
}
|
||||
|
||||
override AssignExpr getNode() { result = super.getNode() }
|
||||
}
|
||||
|
||||
/** A control flow node corresponding to a binary expression, such as `x + y` */
|
||||
class BinaryExprNode extends ControlFlowNode {
|
||||
BinaryExprNode() { toAst(this) instanceof BinaryExpr }
|
||||
@@ -630,6 +655,8 @@ class DefinitionNode extends ControlFlowNode {
|
||||
Stages::AST::ref() and
|
||||
exists(Assign a | a.getATarget().getAFlowNode() = this)
|
||||
or
|
||||
exists(AssignExpr a | a.getTarget().getAFlowNode() = this)
|
||||
or
|
||||
exists(AnnAssign a | a.getTarget().getAFlowNode() = this and exists(a.getValue()))
|
||||
or
|
||||
exists(Alias a | a.getAsname().getAFlowNode() = this)
|
||||
@@ -640,12 +667,23 @@ class DefinitionNode extends ControlFlowNode {
|
||||
exists(Assign a | list_or_tuple_nested_element(a.getATarget()).getAFlowNode() = this)
|
||||
or
|
||||
exists(For for | for.getTarget().getAFlowNode() = this)
|
||||
or
|
||||
exists(Parameter param | this = param.asName().getAFlowNode() and exists(param.getDefault()))
|
||||
}
|
||||
|
||||
/** flow node corresponding to the value assigned for the definition corresponding to this flow node */
|
||||
ControlFlowNode getValue() {
|
||||
result = assigned_value(this.getNode()).getAFlowNode() and
|
||||
(result.getBasicBlock().dominates(this.getBasicBlock()) or result.isImport())
|
||||
(
|
||||
result.getBasicBlock().dominates(this.getBasicBlock())
|
||||
or
|
||||
result.isImport()
|
||||
or
|
||||
// since the default value for a parameter is evaluated in the same basic block as
|
||||
// the function definition, but the parameter belongs to the basic block of the function,
|
||||
// there is no dominance relationship between the two.
|
||||
exists(Parameter param | this = param.asName().getAFlowNode())
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -776,6 +814,9 @@ private AstNode assigned_value(Expr lhs) {
|
||||
/* lhs = result */
|
||||
exists(Assign a | a.getATarget() = lhs and result = a.getValue())
|
||||
or
|
||||
/* lhs := result */
|
||||
exists(AssignExpr a | a.getTarget() = lhs and result = a.getValue())
|
||||
or
|
||||
/* lhs : annotation = result */
|
||||
exists(AnnAssign a | a.getTarget() = lhs and result = a.getValue())
|
||||
or
|
||||
@@ -795,6 +836,8 @@ private AstNode assigned_value(Expr lhs) {
|
||||
or
|
||||
/* for lhs in seq: => `result` is the `for` node, representing the `iter(next(seq))` operation. */
|
||||
result.(For).getTarget() = lhs
|
||||
or
|
||||
exists(Parameter param | lhs = param.asName() and result = param.getDefault())
|
||||
}
|
||||
|
||||
predicate nested_sequence_assign(
|
||||
|
||||
@@ -5,12 +5,18 @@
|
||||
// If you add modeling of a new framework/library, remember to add it to the docs in
|
||||
// `docs/codeql/reusables/supported-frameworks.rst`
|
||||
private import semmle.python.frameworks.Aioch
|
||||
private import semmle.python.frameworks.Aiofile
|
||||
private import semmle.python.frameworks.Aiofiles
|
||||
private import semmle.python.frameworks.Aiohttp
|
||||
private import semmle.python.frameworks.Aiomysql
|
||||
private import semmle.python.frameworks.Aiosqlite
|
||||
private import semmle.python.frameworks.Aiopg
|
||||
private import semmle.python.frameworks.Aiosqlite
|
||||
private import semmle.python.frameworks.Anyio
|
||||
private import semmle.python.frameworks.Asyncpg
|
||||
private import semmle.python.frameworks.Baize
|
||||
private import semmle.python.frameworks.BSon
|
||||
private import semmle.python.frameworks.CassandraDriver
|
||||
private import semmle.python.frameworks.Cherrypy
|
||||
private import semmle.python.frameworks.ClickhouseDriver
|
||||
private import semmle.python.frameworks.Cryptodome
|
||||
private import semmle.python.frameworks.Cryptography
|
||||
@@ -27,6 +33,7 @@ private import semmle.python.frameworks.Httpx
|
||||
private import semmle.python.frameworks.Idna
|
||||
private import semmle.python.frameworks.Invoke
|
||||
private import semmle.python.frameworks.Jmespath
|
||||
private import semmle.python.frameworks.Joblib
|
||||
private import semmle.python.frameworks.Ldap
|
||||
private import semmle.python.frameworks.Ldap3
|
||||
private import semmle.python.frameworks.Libtaxii
|
||||
@@ -36,12 +43,15 @@ private import semmle.python.frameworks.MarkupSafe
|
||||
private import semmle.python.frameworks.Multidict
|
||||
private import semmle.python.frameworks.Mysql
|
||||
private import semmle.python.frameworks.MySQLdb
|
||||
private import semmle.python.frameworks.Numpy
|
||||
private import semmle.python.frameworks.Oracledb
|
||||
private import semmle.python.frameworks.Pandas
|
||||
private import semmle.python.frameworks.Peewee
|
||||
private import semmle.python.frameworks.Phoenixdb
|
||||
private import semmle.python.frameworks.Psycopg2
|
||||
private import semmle.python.frameworks.Pycurl
|
||||
private import semmle.python.frameworks.Pydantic
|
||||
private import semmle.python.frameworks.PyMongo
|
||||
private import semmle.python.frameworks.Pymssql
|
||||
private import semmle.python.frameworks.PyMySQL
|
||||
private import semmle.python.frameworks.Pyodbc
|
||||
@@ -49,11 +59,13 @@ private import semmle.python.frameworks.Requests
|
||||
private import semmle.python.frameworks.RestFramework
|
||||
private import semmle.python.frameworks.Rsa
|
||||
private import semmle.python.frameworks.RuamelYaml
|
||||
private import semmle.python.frameworks.Sanic
|
||||
private import semmle.python.frameworks.ServerLess
|
||||
private import semmle.python.frameworks.Setuptools
|
||||
private import semmle.python.frameworks.Simplejson
|
||||
private import semmle.python.frameworks.SqlAlchemy
|
||||
private import semmle.python.frameworks.Starlette
|
||||
private import semmle.python.frameworks.Stdlib
|
||||
private import semmle.python.frameworks.Setuptools
|
||||
private import semmle.python.frameworks.Toml
|
||||
private import semmle.python.frameworks.Tornado
|
||||
private import semmle.python.frameworks.Twisted
|
||||
|
||||
@@ -179,23 +179,23 @@ private predicate legalDottedName(string name) {
|
||||
bindingset[name]
|
||||
private predicate legalShortName(string name) { name.regexpMatch("(\\p{L}|_)(\\p{L}|\\d|_)*") }
|
||||
|
||||
/**
|
||||
* Holds if `f` is potentially a source package.
|
||||
* Does it have an __init__.py file (or --respect-init=False for Python 2) and is it within the source archive?
|
||||
*/
|
||||
private predicate isPotentialSourcePackage(Folder f) {
|
||||
f.getRelativePath() != "" and
|
||||
isPotentialPackage(f)
|
||||
}
|
||||
|
||||
private predicate isPotentialPackage(Folder f) {
|
||||
exists(f.getFile("__init__.py"))
|
||||
or
|
||||
py_flags_versioned("options.respect_init", "False", _) and major_version() = 2 and exists(f)
|
||||
}
|
||||
|
||||
private string moduleNameFromBase(Container file) {
|
||||
isPotentialPackage(file) and result = file.getBaseName()
|
||||
// We used to also require `isPotentialPackage(f)` to hold in this case,
|
||||
// but we saw modules not getting resolved because their folder did not
|
||||
// contain an `__init__.py` file.
|
||||
//
|
||||
// This makes the folder not be a package but a namespace package instead.
|
||||
// In most cases this is a mistake :| See following links for more details
|
||||
// - https://dev.to/methane/don-t-omit-init-py-3hga
|
||||
// - https://packaging.python.org/en/latest/guides/packaging-namespace-packages/
|
||||
// - https://discuss.python.org/t/init-py-pep-420-and-iter-modules-confusion/9642
|
||||
//
|
||||
// It is possible that we can keep the original requirement on
|
||||
// `isPotentialPackage(f)` here, but relax `isPotentialPackage` itself to allow
|
||||
// for this behavior of missing `__init__.py` files. However, doing so involves
|
||||
// cascading changes (for example to `moduleNameFromFile`), and was a more involved
|
||||
// task than we wanted to take on.
|
||||
result = file.getBaseName()
|
||||
or
|
||||
file instanceof File and result = file.getStem()
|
||||
}
|
||||
@@ -221,31 +221,114 @@ private predicate transitively_imported_from_entry_point(File file) {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the folder `f` is a regular Python package,
|
||||
* containing an `__init__.py` file.
|
||||
*/
|
||||
private predicate isRegularPackage(Folder f, string name) {
|
||||
legalShortName(name) and
|
||||
name = f.getStem() and
|
||||
exists(f.getFile("__init__.py"))
|
||||
}
|
||||
|
||||
/** Gets the name of a module imported in package `c`. */
|
||||
private string moduleImportedInPackage(Container c) {
|
||||
legalShortName(result) and
|
||||
// it has to be imported in this folder
|
||||
result =
|
||||
any(ImportExpr i | i.getLocation().getFile().getParent() = c)
|
||||
.getName()
|
||||
// strip everything after the first `.`
|
||||
.regexpReplaceAll("\\..*", "") and
|
||||
result != ""
|
||||
}
|
||||
|
||||
/** Holds if the file `f` could be resolved to a module named `name`. */
|
||||
private predicate isPotentialModuleFile(File file, string name) {
|
||||
legalShortName(name) and
|
||||
name = file.getStem() and
|
||||
file.getExtension() = ["py", "pyc", "so", "pyd"] and
|
||||
// it has to be imported in this folder
|
||||
name = moduleImportedInPackage(file.getParent())
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the folder `f` is a namespace package named `name`.
|
||||
*
|
||||
* See https://peps.python.org/pep-0420/#specification
|
||||
* for details on namespace packages.
|
||||
*/
|
||||
private predicate isNameSpacePackage(Folder f, string name) {
|
||||
legalShortName(name) and
|
||||
name = f.getStem() and
|
||||
not isRegularPackage(f, name) and
|
||||
// it has to be imported in a file
|
||||
// either in this folder or next to this folder
|
||||
name = moduleImportedInPackage([f, f.getParent()]) and
|
||||
// no sibling regular package
|
||||
// and no sibling module
|
||||
not exists(Folder sibling | sibling.getParent() = f.getParent() |
|
||||
isRegularPackage(sibling.getFolder(name), name)
|
||||
or
|
||||
isPotentialModuleFile(sibling.getAFile(), name)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the folder `f` is a package (either a regular package
|
||||
* or a namespace package) named `name`.
|
||||
*/
|
||||
private predicate isPackage(Folder f, string name) {
|
||||
isRegularPackage(f, name)
|
||||
or
|
||||
isNameSpacePackage(f, name)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the file `f` is a module named `name`.
|
||||
*/
|
||||
private predicate isModuleFile(File file, string name) {
|
||||
isPotentialModuleFile(file, name) and
|
||||
not isPackage(file.getParent(), _)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the folder `f` is a package named `name`
|
||||
* and does reside inside another package.
|
||||
*/
|
||||
private predicate isOutermostPackage(Folder f, string name) {
|
||||
isPackage(f, name) and
|
||||
not isPackage(f.getParent(), _)
|
||||
}
|
||||
|
||||
/** Gets the name of the module that `c` resolves to, if any. */
|
||||
cached
|
||||
string moduleNameFromFile(Container file) {
|
||||
string moduleNameFromFile(Container c) {
|
||||
// package
|
||||
isOutermostPackage(c, result)
|
||||
or
|
||||
// module
|
||||
isModuleFile(c, result)
|
||||
or
|
||||
Stages::AST::ref() and
|
||||
exists(string basename |
|
||||
basename = moduleNameFromBase(file) and
|
||||
basename = moduleNameFromBase(c) and
|
||||
legalShortName(basename)
|
||||
|
|
||||
result = moduleNameFromFile(file.getParent()) + "." + basename
|
||||
// recursive case
|
||||
result = moduleNameFromFile(c.getParent()) + "." + basename
|
||||
or
|
||||
// If `file` is a transitive import of a file that's executed directly, we allow references
|
||||
// to it by its `basename`.
|
||||
transitively_imported_from_entry_point(file) and
|
||||
transitively_imported_from_entry_point(c) and
|
||||
result = basename
|
||||
)
|
||||
or
|
||||
isPotentialSourcePackage(file) and
|
||||
result = file.getStem() and
|
||||
(
|
||||
not isPotentialSourcePackage(file.getParent()) or
|
||||
not legalShortName(file.getParent().getBaseName())
|
||||
)
|
||||
//
|
||||
// standard library
|
||||
result = c.getStem() and c.getParent() = c.getImportRoot()
|
||||
or
|
||||
result = file.getStem() and file.getParent() = file.getImportRoot()
|
||||
or
|
||||
result = file.getStem() and isStubRoot(file.getParent())
|
||||
result = c.getStem() and isStubRoot(c.getParent())
|
||||
}
|
||||
|
||||
private predicate isStubRoot(Folder f) {
|
||||
|
||||
@@ -34,40 +34,3 @@ class StringConstCompareBarrier extends DataFlow::Node {
|
||||
this = DataFlow::BarrierGuard<stringConstCompare/3>::getABarrierNode()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `StringConstCompareBarrier` instead.
|
||||
*
|
||||
* A validation of unknown node by comparing with a constant string value.
|
||||
*/
|
||||
deprecated class StringConstCompare extends DataFlow::BarrierGuard, CompareNode {
|
||||
ControlFlowNode checked_node;
|
||||
boolean safe_branch;
|
||||
|
||||
StringConstCompare() {
|
||||
exists(StrConst str_const, Cmpop op |
|
||||
op = any(Eq eq) and safe_branch = true
|
||||
or
|
||||
op = any(NotEq ne) and safe_branch = false
|
||||
|
|
||||
this.operands(str_const.getAFlowNode(), op, checked_node)
|
||||
or
|
||||
this.operands(checked_node, op, str_const.getAFlowNode())
|
||||
)
|
||||
or
|
||||
exists(IterableNode str_const_iterable, Cmpop op |
|
||||
op = any(In in_) and safe_branch = true
|
||||
or
|
||||
op = any(NotIn ni) and safe_branch = false
|
||||
|
|
||||
forall(ControlFlowNode elem | elem = str_const_iterable.getAnElement() |
|
||||
elem.getNode() instanceof StrConst
|
||||
) and
|
||||
this.operands(checked_node, op, str_const_iterable)
|
||||
)
|
||||
}
|
||||
|
||||
override predicate checks(ControlFlowNode node, boolean branch) {
|
||||
node = checked_node and branch = safe_branch
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,6 +22,8 @@ private import python
|
||||
* global (inter-procedural) data flow analyses.
|
||||
*/
|
||||
module DataFlow {
|
||||
import internal.DataFlow
|
||||
private import internal.DataFlowImplSpecific
|
||||
private import codeql.dataflow.DataFlow
|
||||
import DataFlowMake<PythonDataFlow>
|
||||
import internal.DataFlowImpl1
|
||||
}
|
||||
|
||||
@@ -90,39 +90,32 @@ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::Summari
|
||||
}
|
||||
|
||||
class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack;
|
||||
// // This gives access to getNodeFromPath, which is not constrained to `CallNode`s
|
||||
// // as `resolvedSummaryBase` is.
|
||||
// private import semmle.python.frameworks.data.internal.ApiGraphModels as AGM
|
||||
//
|
||||
// private class SummarizedCallableFromModel extends SummarizedCallable {
|
||||
// string package;
|
||||
// string type;
|
||||
// string path;
|
||||
// SummarizedCallableFromModel() {
|
||||
// ModelOutput::relevantSummaryModel(package, type, path, _, _, _) and
|
||||
// this = package + ";" + type + ";" + path
|
||||
// }
|
||||
// override CallCfgNode getACall() {
|
||||
// exists(API::CallNode base |
|
||||
// ModelOutput::resolvedSummaryBase(package, type, path, base) and
|
||||
// result = base.getACall()
|
||||
// )
|
||||
// }
|
||||
// override ArgumentNode getACallback() {
|
||||
// exists(API::Node base |
|
||||
// base = AGM::getNodeFromPath(package, type, path) and
|
||||
// result = base.getAValueReachableFromSource()
|
||||
// )
|
||||
// }
|
||||
// override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
|
||||
// exists(string kind |
|
||||
// ModelOutput::relevantSummaryModel(package, type, path, input, output, kind)
|
||||
// |
|
||||
// kind = "value" and
|
||||
// preservesValue = true
|
||||
// or
|
||||
// kind = "taint" and
|
||||
// preservesValue = false
|
||||
// )
|
||||
// }
|
||||
// }
|
||||
|
||||
private class SummarizedCallableFromModel extends SummarizedCallable {
|
||||
string type;
|
||||
string path;
|
||||
|
||||
SummarizedCallableFromModel() {
|
||||
ModelOutput::relevantSummaryModel(type, path, _, _, _) and
|
||||
this = type + ";" + path
|
||||
}
|
||||
|
||||
override CallCfgNode getACall() { ModelOutput::resolvedSummaryBase(type, path, result) }
|
||||
|
||||
override ArgumentNode getACallback() {
|
||||
exists(API::Node base |
|
||||
ModelOutput::resolvedSummaryRefBase(type, path, base) and
|
||||
result = base.getAValueReachableFromSource()
|
||||
)
|
||||
}
|
||||
|
||||
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
|
||||
exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind) |
|
||||
kind = "value" and
|
||||
preservesValue = true
|
||||
or
|
||||
kind = "taint" and
|
||||
preservesValue = false
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,10 @@ private import python
|
||||
* global (inter-procedural) taint-tracking analyses.
|
||||
*/
|
||||
module TaintTracking {
|
||||
import internal.tainttracking1.TaintTracking
|
||||
import semmle.python.dataflow.new.internal.tainttracking1.TaintTrackingParameter::Public
|
||||
private import semmle.python.dataflow.new.internal.DataFlowImplSpecific
|
||||
private import semmle.python.dataflow.new.internal.TaintTrackingImplSpecific
|
||||
private import codeql.dataflow.TaintTracking
|
||||
import TaintFlowMake<PythonDataFlow, PythonTaintTracking>
|
||||
import internal.tainttracking1.TaintTrackingImpl
|
||||
}
|
||||
|
||||
@@ -1,412 +0,0 @@
|
||||
/**
|
||||
* Provides an implementation of global (interprocedural) data flow. This file
|
||||
* re-exports the local (intraprocedural) data flow analysis from
|
||||
* `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed
|
||||
* through the `Global` and `GlobalWithState` modules.
|
||||
*/
|
||||
|
||||
private import DataFlowImplCommon
|
||||
private import DataFlowImplSpecific::Private
|
||||
import DataFlowImplSpecific::Public
|
||||
import DataFlowImplCommonPublic
|
||||
private import DataFlowImpl
|
||||
|
||||
/** An input configuration for data flow. */
|
||||
signature module ConfigSig {
|
||||
/**
|
||||
* Holds if `source` is a relevant data flow source.
|
||||
*/
|
||||
predicate isSource(Node source);
|
||||
|
||||
/**
|
||||
* Holds if `sink` is a relevant data flow sink.
|
||||
*/
|
||||
predicate isSink(Node sink);
|
||||
|
||||
/**
|
||||
* Holds if data flow through `node` is prohibited. This completely removes
|
||||
* `node` from the data flow graph.
|
||||
*/
|
||||
default predicate isBarrier(Node node) { none() }
|
||||
|
||||
/** Holds if data flow into `node` is prohibited. */
|
||||
default predicate isBarrierIn(Node node) { none() }
|
||||
|
||||
/** Holds if data flow out of `node` is prohibited. */
|
||||
default predicate isBarrierOut(Node node) { none() }
|
||||
|
||||
/**
|
||||
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
|
||||
*/
|
||||
default predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
|
||||
|
||||
/**
|
||||
* Holds if an arbitrary number of implicit read steps of content `c` may be
|
||||
* taken at `node`.
|
||||
*/
|
||||
default predicate allowImplicitRead(Node node, ContentSet c) { none() }
|
||||
|
||||
/**
|
||||
* Gets the virtual dispatch branching limit when calculating field flow.
|
||||
* This can be overridden to a smaller value to improve performance (a
|
||||
* value of 0 disables field flow), or a larger value to get more results.
|
||||
*/
|
||||
default int fieldFlowBranchLimit() { result = 2 }
|
||||
|
||||
/**
|
||||
* Gets a data flow configuration feature to add restrictions to the set of
|
||||
* valid flow paths.
|
||||
*
|
||||
* - `FeatureHasSourceCallContext`:
|
||||
* Assume that sources have some existing call context to disallow
|
||||
* conflicting return-flow directly following the source.
|
||||
* - `FeatureHasSinkCallContext`:
|
||||
* Assume that sinks have some existing call context to disallow
|
||||
* conflicting argument-to-parameter flow directly preceding the sink.
|
||||
* - `FeatureEqualSourceSinkCallContext`:
|
||||
* Implies both of the above and additionally ensures that the entire flow
|
||||
* path preserves the call context.
|
||||
*
|
||||
* These features are generally not relevant for typical end-to-end data flow
|
||||
* queries, but should only be used for constructing paths that need to
|
||||
* somehow be pluggable in another path context.
|
||||
*/
|
||||
default FlowFeature getAFeature() { none() }
|
||||
|
||||
/** Holds if sources should be grouped in the result of `flowPath`. */
|
||||
default predicate sourceGrouping(Node source, string sourceGroup) { none() }
|
||||
|
||||
/** Holds if sinks should be grouped in the result of `flowPath`. */
|
||||
default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
|
||||
|
||||
/**
|
||||
* Holds if hidden nodes should be included in the data flow graph.
|
||||
*
|
||||
* This feature should only be used for debugging or when the data flow graph
|
||||
* is not visualized (as it is in a `path-problem` query).
|
||||
*/
|
||||
default predicate includeHiddenNodes() { none() }
|
||||
}
|
||||
|
||||
/** An input configuration for data flow using flow state. */
|
||||
signature module StateConfigSig {
|
||||
bindingset[this]
|
||||
class FlowState;
|
||||
|
||||
/**
|
||||
* Holds if `source` is a relevant data flow source with the given initial
|
||||
* `state`.
|
||||
*/
|
||||
predicate isSource(Node source, FlowState state);
|
||||
|
||||
/**
|
||||
* Holds if `sink` is a relevant data flow sink accepting `state`.
|
||||
*/
|
||||
predicate isSink(Node sink, FlowState state);
|
||||
|
||||
/**
|
||||
* Holds if data flow through `node` is prohibited. This completely removes
|
||||
* `node` from the data flow graph.
|
||||
*/
|
||||
default predicate isBarrier(Node node) { none() }
|
||||
|
||||
/**
|
||||
* Holds if data flow through `node` is prohibited when the flow state is
|
||||
* `state`.
|
||||
*/
|
||||
predicate isBarrier(Node node, FlowState state);
|
||||
|
||||
/** Holds if data flow into `node` is prohibited. */
|
||||
default predicate isBarrierIn(Node node) { none() }
|
||||
|
||||
/** Holds if data flow out of `node` is prohibited. */
|
||||
default predicate isBarrierOut(Node node) { none() }
|
||||
|
||||
/**
|
||||
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
|
||||
*/
|
||||
default predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
|
||||
|
||||
/**
|
||||
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
|
||||
* This step is only applicable in `state1` and updates the flow state to `state2`.
|
||||
*/
|
||||
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2);
|
||||
|
||||
/**
|
||||
* Holds if an arbitrary number of implicit read steps of content `c` may be
|
||||
* taken at `node`.
|
||||
*/
|
||||
default predicate allowImplicitRead(Node node, ContentSet c) { none() }
|
||||
|
||||
/**
|
||||
* Gets the virtual dispatch branching limit when calculating field flow.
|
||||
* This can be overridden to a smaller value to improve performance (a
|
||||
* value of 0 disables field flow), or a larger value to get more results.
|
||||
*/
|
||||
default int fieldFlowBranchLimit() { result = 2 }
|
||||
|
||||
/**
|
||||
* Gets a data flow configuration feature to add restrictions to the set of
|
||||
* valid flow paths.
|
||||
*
|
||||
* - `FeatureHasSourceCallContext`:
|
||||
* Assume that sources have some existing call context to disallow
|
||||
* conflicting return-flow directly following the source.
|
||||
* - `FeatureHasSinkCallContext`:
|
||||
* Assume that sinks have some existing call context to disallow
|
||||
* conflicting argument-to-parameter flow directly preceding the sink.
|
||||
* - `FeatureEqualSourceSinkCallContext`:
|
||||
* Implies both of the above and additionally ensures that the entire flow
|
||||
* path preserves the call context.
|
||||
*
|
||||
* These features are generally not relevant for typical end-to-end data flow
|
||||
* queries, but should only be used for constructing paths that need to
|
||||
* somehow be pluggable in another path context.
|
||||
*/
|
||||
default FlowFeature getAFeature() { none() }
|
||||
|
||||
/** Holds if sources should be grouped in the result of `flowPath`. */
|
||||
default predicate sourceGrouping(Node source, string sourceGroup) { none() }
|
||||
|
||||
/** Holds if sinks should be grouped in the result of `flowPath`. */
|
||||
default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
|
||||
|
||||
/**
|
||||
* Holds if hidden nodes should be included in the data flow graph.
|
||||
*
|
||||
* This feature should only be used for debugging or when the data flow graph
|
||||
* is not visualized (as it is in a `path-problem` query).
|
||||
*/
|
||||
default predicate includeHiddenNodes() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the exploration limit for `partialFlow` and `partialFlowRev`
|
||||
* measured in approximate number of interprocedural steps.
|
||||
*/
|
||||
signature int explorationLimitSig();
|
||||
|
||||
/**
|
||||
* The output of a global data flow computation.
|
||||
*/
|
||||
signature module GlobalFlowSig {
|
||||
/**
|
||||
* A `Node` augmented with a call context (except for sinks) and an access path.
|
||||
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
|
||||
*/
|
||||
class PathNode;
|
||||
|
||||
/**
|
||||
* Holds if data can flow from `source` to `sink`.
|
||||
*
|
||||
* The corresponding paths are generated from the end-points and the graph
|
||||
* included in the module `PathGraph`.
|
||||
*/
|
||||
predicate flowPath(PathNode source, PathNode sink);
|
||||
|
||||
/**
|
||||
* Holds if data can flow from `source` to `sink`.
|
||||
*/
|
||||
predicate flow(Node source, Node sink);
|
||||
|
||||
/**
|
||||
* Holds if data can flow from some source to `sink`.
|
||||
*/
|
||||
predicate flowTo(Node sink);
|
||||
|
||||
/**
|
||||
* Holds if data can flow from some source to `sink`.
|
||||
*/
|
||||
predicate flowToExpr(DataFlowExpr sink);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a global data flow computation.
|
||||
*/
|
||||
module Global<ConfigSig Config> implements GlobalFlowSig {
|
||||
private module C implements FullStateConfigSig {
|
||||
import DefaultState<Config>
|
||||
import Config
|
||||
}
|
||||
|
||||
import Impl<C>
|
||||
}
|
||||
|
||||
/** DEPRECATED: Use `Global` instead. */
|
||||
deprecated module Make<ConfigSig Config> implements GlobalFlowSig {
|
||||
import Global<Config>
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a global data flow computation using flow state.
|
||||
*/
|
||||
module GlobalWithState<StateConfigSig Config> implements GlobalFlowSig {
|
||||
private module C implements FullStateConfigSig {
|
||||
import Config
|
||||
}
|
||||
|
||||
import Impl<C>
|
||||
}
|
||||
|
||||
/** DEPRECATED: Use `GlobalWithState` instead. */
|
||||
deprecated module MakeWithState<StateConfigSig Config> implements GlobalFlowSig {
|
||||
import GlobalWithState<Config>
|
||||
}
|
||||
|
||||
signature class PathNodeSig {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString();
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
);
|
||||
|
||||
/** Gets the underlying `Node`. */
|
||||
Node getNode();
|
||||
}
|
||||
|
||||
signature module PathGraphSig<PathNodeSig PathNode> {
|
||||
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
|
||||
predicate edges(PathNode a, PathNode b);
|
||||
|
||||
/** Holds if `n` is a node in the graph of data flow path explanations. */
|
||||
predicate nodes(PathNode n, string key, string val);
|
||||
|
||||
/**
|
||||
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
|
||||
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
|
||||
* `ret -> out` is summarized as the edge `arg -> out`.
|
||||
*/
|
||||
predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a `PathGraph` from two `PathGraph`s by disjoint union.
|
||||
*/
|
||||
module MergePathGraph<
|
||||
PathNodeSig PathNode1, PathNodeSig PathNode2, PathGraphSig<PathNode1> Graph1,
|
||||
PathGraphSig<PathNode2> Graph2>
|
||||
{
|
||||
private newtype TPathNode =
|
||||
TPathNode1(PathNode1 p) or
|
||||
TPathNode2(PathNode2 p)
|
||||
|
||||
/** A node in a graph of path explanations that is formed by disjoint union of the two given graphs. */
|
||||
class PathNode extends TPathNode {
|
||||
/** Gets this as a projection on the first given `PathGraph`. */
|
||||
PathNode1 asPathNode1() { this = TPathNode1(result) }
|
||||
|
||||
/** Gets this as a projection on the second given `PathGraph`. */
|
||||
PathNode2 asPathNode2() { this = TPathNode2(result) }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() {
|
||||
result = this.asPathNode1().toString() or
|
||||
result = this.asPathNode2().toString()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
this.asPathNode1().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) or
|
||||
this.asPathNode2().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
}
|
||||
|
||||
/** Gets the underlying `Node`. */
|
||||
Node getNode() {
|
||||
result = this.asPathNode1().getNode() or
|
||||
result = this.asPathNode2().getNode()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides the query predicates needed to include a graph in a path-problem query.
|
||||
*/
|
||||
module PathGraph implements PathGraphSig<PathNode> {
|
||||
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
|
||||
query predicate edges(PathNode a, PathNode b) {
|
||||
Graph1::edges(a.asPathNode1(), b.asPathNode1()) or
|
||||
Graph2::edges(a.asPathNode2(), b.asPathNode2())
|
||||
}
|
||||
|
||||
/** Holds if `n` is a node in the graph of data flow path explanations. */
|
||||
query predicate nodes(PathNode n, string key, string val) {
|
||||
Graph1::nodes(n.asPathNode1(), key, val) or
|
||||
Graph2::nodes(n.asPathNode2(), key, val)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
|
||||
* a subpath between `par` and `ret` with the connecting edges `arg -> par` and
|
||||
* `ret -> out` is summarized as the edge `arg -> out`.
|
||||
*/
|
||||
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
|
||||
Graph1::subpaths(arg.asPathNode1(), par.asPathNode1(), ret.asPathNode1(), out.asPathNode1()) or
|
||||
Graph2::subpaths(arg.asPathNode2(), par.asPathNode2(), ret.asPathNode2(), out.asPathNode2())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a `PathGraph` from three `PathGraph`s by disjoint union.
|
||||
*/
|
||||
module MergePathGraph3<
|
||||
PathNodeSig PathNode1, PathNodeSig PathNode2, PathNodeSig PathNode3,
|
||||
PathGraphSig<PathNode1> Graph1, PathGraphSig<PathNode2> Graph2, PathGraphSig<PathNode3> Graph3>
|
||||
{
|
||||
private module MergedInner = MergePathGraph<PathNode1, PathNode2, Graph1, Graph2>;
|
||||
|
||||
private module Merged =
|
||||
MergePathGraph<MergedInner::PathNode, PathNode3, MergedInner::PathGraph, Graph3>;
|
||||
|
||||
/** A node in a graph of path explanations that is formed by disjoint union of the three given graphs. */
|
||||
class PathNode instanceof Merged::PathNode {
|
||||
/** Gets this as a projection on the first given `PathGraph`. */
|
||||
PathNode1 asPathNode1() { result = super.asPathNode1().asPathNode1() }
|
||||
|
||||
/** Gets this as a projection on the second given `PathGraph`. */
|
||||
PathNode2 asPathNode2() { result = super.asPathNode1().asPathNode2() }
|
||||
|
||||
/** Gets this as a projection on the third given `PathGraph`. */
|
||||
PathNode3 asPathNode3() { result = super.asPathNode2() }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = super.toString() }
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
}
|
||||
|
||||
/** Gets the underlying `Node`. */
|
||||
Node getNode() { result = super.getNode() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides the query predicates needed to include a graph in a path-problem query.
|
||||
*/
|
||||
module PathGraph = Merged::PathGraph;
|
||||
}
|
||||
@@ -240,6 +240,19 @@ predicate hasPropertyDecorator(Function func) {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the function `func` has a `contextlib.contextmanager`.
|
||||
*/
|
||||
predicate hasContextmanagerDecorator(Function func) {
|
||||
exists(ControlFlowNode contextmanager |
|
||||
contextmanager.(NameNode).getId() = "contextmanager" and contextmanager.(NameNode).isGlobal()
|
||||
or
|
||||
contextmanager.(AttrNode).getObject("contextmanager").(NameNode).getId() = "contextlib"
|
||||
|
|
||||
func.getADecorator() = contextmanager.getNode()
|
||||
)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Callables
|
||||
// =============================================================================
|
||||
@@ -251,6 +264,9 @@ abstract class LibraryCallable extends string {
|
||||
/** Gets a call to this library callable. */
|
||||
abstract CallCfgNode getACall();
|
||||
|
||||
/** Same as `getACall` but without referring to the call graph or API graph. */
|
||||
CallCfgNode getACallSimple() { none() }
|
||||
|
||||
/** Gets a data-flow node, where this library callable is used as a call-back. */
|
||||
abstract ArgumentNode getACallback();
|
||||
}
|
||||
@@ -1224,7 +1240,6 @@ predicate normalCallArg(CallNode call, Node arg, ArgumentPosition apos) {
|
||||
* time the bound method is used, such that the `clear()` call would essentially be
|
||||
* translated into `l.clear()`, and we can still have use-use flow.
|
||||
*/
|
||||
pragma[assume_small_delta]
|
||||
cached
|
||||
predicate getCallArg(CallNode call, Function target, CallType type, Node arg, ArgumentPosition apos) {
|
||||
Stages::DataFlow::ref() and
|
||||
@@ -1337,7 +1352,10 @@ abstract class DataFlowCall extends TDataFlowCall {
|
||||
abstract ControlFlowNode getNode();
|
||||
|
||||
/** Gets the enclosing callable of this call. */
|
||||
abstract DataFlowCallable getEnclosingCallable();
|
||||
DataFlowCallable getEnclosingCallable() { result = getCallableScope(this.getScope()) }
|
||||
|
||||
/** Gets the scope of this node, if any. */
|
||||
abstract Scope getScope();
|
||||
|
||||
/** Gets the location of this dataflow call. */
|
||||
abstract Location getLocation();
|
||||
@@ -1385,7 +1403,7 @@ class NormalCall extends ExtractedDataFlowCall, TNormalCall {
|
||||
|
||||
override ControlFlowNode getNode() { result = call }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getScope() }
|
||||
override Scope getScope() { result = call.getScope() }
|
||||
|
||||
override DataFlowCallable getCallable() { result.(DataFlowFunction).getScope() = target }
|
||||
|
||||
@@ -1435,7 +1453,7 @@ class PotentialLibraryCall extends ExtractedDataFlowCall, TPotentialLibraryCall
|
||||
|
||||
override ControlFlowNode getNode() { result = call }
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getScope() }
|
||||
override Scope getScope() { result = call.getScope() }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1459,6 +1477,8 @@ class SummaryCall extends DataFlowCall, TSummaryCall {
|
||||
|
||||
override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
|
||||
|
||||
override Scope getScope() { none() }
|
||||
|
||||
override DataFlowCallable getCallable() { none() }
|
||||
|
||||
override ArgumentNode getArgument(ArgumentPosition apos) { none() }
|
||||
@@ -1559,7 +1579,8 @@ private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNodeImpl
|
||||
}
|
||||
|
||||
/** Gets a viable run-time target for the call `call`. */
|
||||
DataFlowCallable viableCallable(ExtractedDataFlowCall call) {
|
||||
DataFlowCallable viableCallable(DataFlowCall call) {
|
||||
call instanceof ExtractedDataFlowCall and
|
||||
result = call.getCallable()
|
||||
or
|
||||
// A call to a library callable with a flow summary
|
||||
@@ -1601,6 +1622,24 @@ class ExtractedReturnNode extends ReturnNode, CfgNode {
|
||||
override ReturnKind getKind() { any() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow node that represents the value yielded by a callable with a
|
||||
* `contextlib.contextmanager` decorator. We treat this as a normal return, which makes
|
||||
* things just work when used in a `with` statement -- technically calling the function
|
||||
* directly will give you a `contextlib._GeneratorContextManager` instance, so it's a
|
||||
* slight workaround solution.
|
||||
*
|
||||
* See https://docs.python.org/3/library/contextlib.html#contextlib.contextmanager
|
||||
*/
|
||||
class YieldNodeInContextManagerFunction extends ReturnNode, CfgNode {
|
||||
YieldNodeInContextManagerFunction() {
|
||||
hasContextmanagerDecorator(node.getScope()) and
|
||||
node = any(Yield yield).getValue().getAFlowNode()
|
||||
}
|
||||
|
||||
override ReturnKind getKind() { any() }
|
||||
}
|
||||
|
||||
/** A data-flow node that represents the output of a call. */
|
||||
abstract class OutNode extends Node {
|
||||
/** Gets the underlying call, where this node is a corresponding output of kind `kind`. */
|
||||
@@ -1636,13 +1675,3 @@ private module OutNodes {
|
||||
* `kind`.
|
||||
*/
|
||||
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }
|
||||
|
||||
/**
|
||||
* Holds if flow from `call`'s argument `arg` to parameter `p` is permissible.
|
||||
*
|
||||
* This is a temporary hook to support technical debt in the Go language; do not use.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate golangSpecificParamArgFilter(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
|
||||
any()
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -91,21 +91,6 @@ abstract class Configuration extends string {
|
||||
/** Holds if data flow out of `node` is prohibited. */
|
||||
predicate isBarrierOut(Node node) { none() }
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if data flow through nodes guarded by `guard` is prohibited.
|
||||
*/
|
||||
deprecated predicate isBarrierGuard(BarrierGuard guard) { none() }
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if data flow through nodes guarded by `guard` is prohibited when
|
||||
* the flow state is `state`
|
||||
*/
|
||||
deprecated predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() }
|
||||
|
||||
/**
|
||||
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
|
||||
*/
|
||||
@@ -225,29 +210,6 @@ abstract private class ConfigurationRecursionPrevention extends Configuration {
|
||||
}
|
||||
}
|
||||
|
||||
/** A bridge class to access the deprecated `isBarrierGuard`. */
|
||||
private class BarrierGuardGuardedNodeBridge extends Unit {
|
||||
abstract predicate guardedNode(Node n, Configuration config);
|
||||
|
||||
abstract predicate guardedNode(Node n, FlowState state, Configuration config);
|
||||
}
|
||||
|
||||
private class BarrierGuardGuardedNode extends BarrierGuardGuardedNodeBridge {
|
||||
deprecated override predicate guardedNode(Node n, Configuration config) {
|
||||
exists(BarrierGuard g |
|
||||
config.isBarrierGuard(g) and
|
||||
n = g.getAGuardedNode()
|
||||
)
|
||||
}
|
||||
|
||||
deprecated override predicate guardedNode(Node n, FlowState state, Configuration config) {
|
||||
exists(BarrierGuard g |
|
||||
config.isBarrierGuard(g, state) and
|
||||
n = g.getAGuardedNode()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private FlowState relevantState(Configuration config) {
|
||||
config.isSource(_, result) or
|
||||
config.isSink(_, result) or
|
||||
@@ -276,6 +238,8 @@ private module Config implements FullStateConfigSig {
|
||||
getConfig(state).isSource(source) and getState(state) instanceof FlowStateEmpty
|
||||
}
|
||||
|
||||
predicate isSink(Node sink) { none() }
|
||||
|
||||
predicate isSink(Node sink, FlowState state) {
|
||||
getConfig(state).isSink(sink, getState(state))
|
||||
or
|
||||
@@ -286,15 +250,17 @@ private module Config implements FullStateConfigSig {
|
||||
|
||||
predicate isBarrier(Node node, FlowState state) {
|
||||
getConfig(state).isBarrier(node, getState(state)) or
|
||||
getConfig(state).isBarrier(node) or
|
||||
any(BarrierGuardGuardedNodeBridge b).guardedNode(node, getState(state), getConfig(state)) or
|
||||
any(BarrierGuardGuardedNodeBridge b).guardedNode(node, getConfig(state))
|
||||
getConfig(state).isBarrier(node)
|
||||
}
|
||||
|
||||
predicate isBarrierIn(Node node) { any(Configuration config).isBarrierIn(node) }
|
||||
|
||||
predicate isBarrierOut(Node node) { any(Configuration config).isBarrierOut(node) }
|
||||
|
||||
predicate isBarrierIn(Node node, FlowState state) { none() }
|
||||
|
||||
predicate isBarrierOut(Node node, FlowState state) { none() }
|
||||
|
||||
predicate isAdditionalFlowStep(Node node1, Node node2) {
|
||||
singleConfiguration() and
|
||||
any(Configuration config).isAdditionalFlowStep(node1, node2)
|
||||
@@ -313,6 +279,8 @@ private module Config implements FullStateConfigSig {
|
||||
any(Configuration config).allowImplicitRead(node, c)
|
||||
}
|
||||
|
||||
predicate neverSkip(Node node) { none() }
|
||||
|
||||
int fieldFlowBranchLimit() { result = min(any(Configuration config).fieldFlowBranchLimit()) }
|
||||
|
||||
FlowFeature getAFeature() { result = any(Configuration config).getAFeature() }
|
||||
|
||||
@@ -91,21 +91,6 @@ abstract class Configuration extends string {
|
||||
/** Holds if data flow out of `node` is prohibited. */
|
||||
predicate isBarrierOut(Node node) { none() }
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if data flow through nodes guarded by `guard` is prohibited.
|
||||
*/
|
||||
deprecated predicate isBarrierGuard(BarrierGuard guard) { none() }
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if data flow through nodes guarded by `guard` is prohibited when
|
||||
* the flow state is `state`
|
||||
*/
|
||||
deprecated predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() }
|
||||
|
||||
/**
|
||||
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
|
||||
*/
|
||||
@@ -225,29 +210,6 @@ abstract private class ConfigurationRecursionPrevention extends Configuration {
|
||||
}
|
||||
}
|
||||
|
||||
/** A bridge class to access the deprecated `isBarrierGuard`. */
|
||||
private class BarrierGuardGuardedNodeBridge extends Unit {
|
||||
abstract predicate guardedNode(Node n, Configuration config);
|
||||
|
||||
abstract predicate guardedNode(Node n, FlowState state, Configuration config);
|
||||
}
|
||||
|
||||
private class BarrierGuardGuardedNode extends BarrierGuardGuardedNodeBridge {
|
||||
deprecated override predicate guardedNode(Node n, Configuration config) {
|
||||
exists(BarrierGuard g |
|
||||
config.isBarrierGuard(g) and
|
||||
n = g.getAGuardedNode()
|
||||
)
|
||||
}
|
||||
|
||||
deprecated override predicate guardedNode(Node n, FlowState state, Configuration config) {
|
||||
exists(BarrierGuard g |
|
||||
config.isBarrierGuard(g, state) and
|
||||
n = g.getAGuardedNode()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private FlowState relevantState(Configuration config) {
|
||||
config.isSource(_, result) or
|
||||
config.isSink(_, result) or
|
||||
@@ -276,6 +238,8 @@ private module Config implements FullStateConfigSig {
|
||||
getConfig(state).isSource(source) and getState(state) instanceof FlowStateEmpty
|
||||
}
|
||||
|
||||
predicate isSink(Node sink) { none() }
|
||||
|
||||
predicate isSink(Node sink, FlowState state) {
|
||||
getConfig(state).isSink(sink, getState(state))
|
||||
or
|
||||
@@ -286,15 +250,17 @@ private module Config implements FullStateConfigSig {
|
||||
|
||||
predicate isBarrier(Node node, FlowState state) {
|
||||
getConfig(state).isBarrier(node, getState(state)) or
|
||||
getConfig(state).isBarrier(node) or
|
||||
any(BarrierGuardGuardedNodeBridge b).guardedNode(node, getState(state), getConfig(state)) or
|
||||
any(BarrierGuardGuardedNodeBridge b).guardedNode(node, getConfig(state))
|
||||
getConfig(state).isBarrier(node)
|
||||
}
|
||||
|
||||
predicate isBarrierIn(Node node) { any(Configuration config).isBarrierIn(node) }
|
||||
|
||||
predicate isBarrierOut(Node node) { any(Configuration config).isBarrierOut(node) }
|
||||
|
||||
predicate isBarrierIn(Node node, FlowState state) { none() }
|
||||
|
||||
predicate isBarrierOut(Node node, FlowState state) { none() }
|
||||
|
||||
predicate isAdditionalFlowStep(Node node1, Node node2) {
|
||||
singleConfiguration() and
|
||||
any(Configuration config).isAdditionalFlowStep(node1, node2)
|
||||
@@ -313,6 +279,8 @@ private module Config implements FullStateConfigSig {
|
||||
any(Configuration config).allowImplicitRead(node, c)
|
||||
}
|
||||
|
||||
predicate neverSkip(Node node) { none() }
|
||||
|
||||
int fieldFlowBranchLimit() { result = min(any(Configuration config).fieldFlowBranchLimit()) }
|
||||
|
||||
FlowFeature getAFeature() { result = any(Configuration config).getAFeature() }
|
||||
|
||||
@@ -91,21 +91,6 @@ abstract class Configuration extends string {
|
||||
/** Holds if data flow out of `node` is prohibited. */
|
||||
predicate isBarrierOut(Node node) { none() }
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if data flow through nodes guarded by `guard` is prohibited.
|
||||
*/
|
||||
deprecated predicate isBarrierGuard(BarrierGuard guard) { none() }
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if data flow through nodes guarded by `guard` is prohibited when
|
||||
* the flow state is `state`
|
||||
*/
|
||||
deprecated predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() }
|
||||
|
||||
/**
|
||||
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
|
||||
*/
|
||||
@@ -225,29 +210,6 @@ abstract private class ConfigurationRecursionPrevention extends Configuration {
|
||||
}
|
||||
}
|
||||
|
||||
/** A bridge class to access the deprecated `isBarrierGuard`. */
|
||||
private class BarrierGuardGuardedNodeBridge extends Unit {
|
||||
abstract predicate guardedNode(Node n, Configuration config);
|
||||
|
||||
abstract predicate guardedNode(Node n, FlowState state, Configuration config);
|
||||
}
|
||||
|
||||
private class BarrierGuardGuardedNode extends BarrierGuardGuardedNodeBridge {
|
||||
deprecated override predicate guardedNode(Node n, Configuration config) {
|
||||
exists(BarrierGuard g |
|
||||
config.isBarrierGuard(g) and
|
||||
n = g.getAGuardedNode()
|
||||
)
|
||||
}
|
||||
|
||||
deprecated override predicate guardedNode(Node n, FlowState state, Configuration config) {
|
||||
exists(BarrierGuard g |
|
||||
config.isBarrierGuard(g, state) and
|
||||
n = g.getAGuardedNode()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private FlowState relevantState(Configuration config) {
|
||||
config.isSource(_, result) or
|
||||
config.isSink(_, result) or
|
||||
@@ -276,6 +238,8 @@ private module Config implements FullStateConfigSig {
|
||||
getConfig(state).isSource(source) and getState(state) instanceof FlowStateEmpty
|
||||
}
|
||||
|
||||
predicate isSink(Node sink) { none() }
|
||||
|
||||
predicate isSink(Node sink, FlowState state) {
|
||||
getConfig(state).isSink(sink, getState(state))
|
||||
or
|
||||
@@ -286,15 +250,17 @@ private module Config implements FullStateConfigSig {
|
||||
|
||||
predicate isBarrier(Node node, FlowState state) {
|
||||
getConfig(state).isBarrier(node, getState(state)) or
|
||||
getConfig(state).isBarrier(node) or
|
||||
any(BarrierGuardGuardedNodeBridge b).guardedNode(node, getState(state), getConfig(state)) or
|
||||
any(BarrierGuardGuardedNodeBridge b).guardedNode(node, getConfig(state))
|
||||
getConfig(state).isBarrier(node)
|
||||
}
|
||||
|
||||
predicate isBarrierIn(Node node) { any(Configuration config).isBarrierIn(node) }
|
||||
|
||||
predicate isBarrierOut(Node node) { any(Configuration config).isBarrierOut(node) }
|
||||
|
||||
predicate isBarrierIn(Node node, FlowState state) { none() }
|
||||
|
||||
predicate isBarrierOut(Node node, FlowState state) { none() }
|
||||
|
||||
predicate isAdditionalFlowStep(Node node1, Node node2) {
|
||||
singleConfiguration() and
|
||||
any(Configuration config).isAdditionalFlowStep(node1, node2)
|
||||
@@ -313,6 +279,8 @@ private module Config implements FullStateConfigSig {
|
||||
any(Configuration config).allowImplicitRead(node, c)
|
||||
}
|
||||
|
||||
predicate neverSkip(Node node) { none() }
|
||||
|
||||
int fieldFlowBranchLimit() { result = min(any(Configuration config).fieldFlowBranchLimit()) }
|
||||
|
||||
FlowFeature getAFeature() { result = any(Configuration config).getAFeature() }
|
||||
|
||||
@@ -91,21 +91,6 @@ abstract class Configuration extends string {
|
||||
/** Holds if data flow out of `node` is prohibited. */
|
||||
predicate isBarrierOut(Node node) { none() }
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if data flow through nodes guarded by `guard` is prohibited.
|
||||
*/
|
||||
deprecated predicate isBarrierGuard(BarrierGuard guard) { none() }
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if data flow through nodes guarded by `guard` is prohibited when
|
||||
* the flow state is `state`
|
||||
*/
|
||||
deprecated predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() }
|
||||
|
||||
/**
|
||||
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
|
||||
*/
|
||||
@@ -225,29 +210,6 @@ abstract private class ConfigurationRecursionPrevention extends Configuration {
|
||||
}
|
||||
}
|
||||
|
||||
/** A bridge class to access the deprecated `isBarrierGuard`. */
|
||||
private class BarrierGuardGuardedNodeBridge extends Unit {
|
||||
abstract predicate guardedNode(Node n, Configuration config);
|
||||
|
||||
abstract predicate guardedNode(Node n, FlowState state, Configuration config);
|
||||
}
|
||||
|
||||
private class BarrierGuardGuardedNode extends BarrierGuardGuardedNodeBridge {
|
||||
deprecated override predicate guardedNode(Node n, Configuration config) {
|
||||
exists(BarrierGuard g |
|
||||
config.isBarrierGuard(g) and
|
||||
n = g.getAGuardedNode()
|
||||
)
|
||||
}
|
||||
|
||||
deprecated override predicate guardedNode(Node n, FlowState state, Configuration config) {
|
||||
exists(BarrierGuard g |
|
||||
config.isBarrierGuard(g, state) and
|
||||
n = g.getAGuardedNode()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private FlowState relevantState(Configuration config) {
|
||||
config.isSource(_, result) or
|
||||
config.isSink(_, result) or
|
||||
@@ -276,6 +238,8 @@ private module Config implements FullStateConfigSig {
|
||||
getConfig(state).isSource(source) and getState(state) instanceof FlowStateEmpty
|
||||
}
|
||||
|
||||
predicate isSink(Node sink) { none() }
|
||||
|
||||
predicate isSink(Node sink, FlowState state) {
|
||||
getConfig(state).isSink(sink, getState(state))
|
||||
or
|
||||
@@ -286,15 +250,17 @@ private module Config implements FullStateConfigSig {
|
||||
|
||||
predicate isBarrier(Node node, FlowState state) {
|
||||
getConfig(state).isBarrier(node, getState(state)) or
|
||||
getConfig(state).isBarrier(node) or
|
||||
any(BarrierGuardGuardedNodeBridge b).guardedNode(node, getState(state), getConfig(state)) or
|
||||
any(BarrierGuardGuardedNodeBridge b).guardedNode(node, getConfig(state))
|
||||
getConfig(state).isBarrier(node)
|
||||
}
|
||||
|
||||
predicate isBarrierIn(Node node) { any(Configuration config).isBarrierIn(node) }
|
||||
|
||||
predicate isBarrierOut(Node node) { any(Configuration config).isBarrierOut(node) }
|
||||
|
||||
predicate isBarrierIn(Node node, FlowState state) { none() }
|
||||
|
||||
predicate isBarrierOut(Node node, FlowState state) { none() }
|
||||
|
||||
predicate isAdditionalFlowStep(Node node1, Node node2) {
|
||||
singleConfiguration() and
|
||||
any(Configuration config).isAdditionalFlowStep(node1, node2)
|
||||
@@ -313,6 +279,8 @@ private module Config implements FullStateConfigSig {
|
||||
any(Configuration config).allowImplicitRead(node, c)
|
||||
}
|
||||
|
||||
predicate neverSkip(Node node) { none() }
|
||||
|
||||
int fieldFlowBranchLimit() { result = min(any(Configuration config).fieldFlowBranchLimit()) }
|
||||
|
||||
FlowFeature getAFeature() { result = any(Configuration config).getAFeature() }
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -3,297 +3,54 @@
|
||||
* data-flow classes and predicates.
|
||||
*/
|
||||
|
||||
private import DataFlowImplSpecific::Private
|
||||
private import DataFlowImplSpecific::Public
|
||||
private import tainttracking1.TaintTrackingParameter::Private
|
||||
private import tainttracking1.TaintTrackingParameter::Public
|
||||
private import python
|
||||
private import DataFlowImplSpecific
|
||||
private import TaintTrackingImplSpecific
|
||||
private import codeql.dataflow.internal.DataFlowImplConsistency
|
||||
|
||||
module Consistency {
|
||||
private newtype TConsistencyConfiguration = MkConsistencyConfiguration()
|
||||
private module Input implements InputSig<PythonDataFlow> {
|
||||
private import Private
|
||||
private import Public
|
||||
|
||||
/** A class for configuring the consistency queries. */
|
||||
class ConsistencyConfiguration extends TConsistencyConfiguration {
|
||||
string toString() { none() }
|
||||
|
||||
/** Holds if `n` should be excluded from the consistency test `uniqueEnclosingCallable`. */
|
||||
predicate uniqueEnclosingCallableExclude(Node n) { none() }
|
||||
|
||||
/** Holds if `call` should be excluded from the consistency test `uniqueCallEnclosingCallable`. */
|
||||
predicate uniqueCallEnclosingCallableExclude(DataFlowCall call) { none() }
|
||||
|
||||
/** Holds if `n` should be excluded from the consistency test `uniqueNodeLocation`. */
|
||||
predicate uniqueNodeLocationExclude(Node n) { none() }
|
||||
|
||||
/** Holds if `n` should be excluded from the consistency test `missingLocation`. */
|
||||
predicate missingLocationExclude(Node n) { none() }
|
||||
|
||||
/** Holds if `n` should be excluded from the consistency test `postWithInFlow`. */
|
||||
predicate postWithInFlowExclude(Node n) { none() }
|
||||
|
||||
/** Holds if `n` should be excluded from the consistency test `argHasPostUpdate`. */
|
||||
predicate argHasPostUpdateExclude(ArgumentNode n) { none() }
|
||||
|
||||
/** Holds if `n` should be excluded from the consistency test `reverseRead`. */
|
||||
predicate reverseReadExclude(Node n) { none() }
|
||||
|
||||
/** Holds if `n` should be excluded from the consistency test `postHasUniquePre`. */
|
||||
predicate postHasUniquePreExclude(PostUpdateNode n) { none() }
|
||||
|
||||
/** Holds if `n` should be excluded from the consistency test `uniquePostUpdate`. */
|
||||
predicate uniquePostUpdateExclude(Node n) { none() }
|
||||
|
||||
/** Holds if `(call, ctx)` should be excluded from the consistency test `viableImplInCallContextTooLargeExclude`. */
|
||||
predicate viableImplInCallContextTooLargeExclude(
|
||||
DataFlowCall call, DataFlowCall ctx, DataFlowCallable callable
|
||||
) {
|
||||
none()
|
||||
}
|
||||
|
||||
/** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodeAtPosition`. */
|
||||
predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
|
||||
none()
|
||||
}
|
||||
|
||||
/** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodePosition`. */
|
||||
predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
|
||||
none()
|
||||
}
|
||||
|
||||
/** Holds if `n` should be excluded from the consistency test `identityLocalStep`. */
|
||||
predicate identityLocalStepExclude(Node n) { none() }
|
||||
}
|
||||
|
||||
private class RelevantNode extends Node {
|
||||
RelevantNode() {
|
||||
this instanceof ArgumentNode or
|
||||
this instanceof ParameterNode or
|
||||
this instanceof ReturnNode or
|
||||
this = getAnOutNode(_, _) or
|
||||
simpleLocalFlowStep(this, _) or
|
||||
simpleLocalFlowStep(_, this) or
|
||||
jumpStep(this, _) or
|
||||
jumpStep(_, this) or
|
||||
storeStep(this, _, _) or
|
||||
storeStep(_, _, this) or
|
||||
readStep(this, _, _) or
|
||||
readStep(_, _, this) or
|
||||
defaultAdditionalTaintStep(this, _) or
|
||||
defaultAdditionalTaintStep(_, this)
|
||||
}
|
||||
}
|
||||
|
||||
query predicate uniqueEnclosingCallable(Node n, string msg) {
|
||||
exists(int c |
|
||||
n instanceof RelevantNode and
|
||||
c = count(nodeGetEnclosingCallable(n)) and
|
||||
c != 1 and
|
||||
not any(ConsistencyConfiguration conf).uniqueEnclosingCallableExclude(n) and
|
||||
msg = "Node should have one enclosing callable but has " + c + "."
|
||||
)
|
||||
}
|
||||
|
||||
query predicate uniqueCallEnclosingCallable(DataFlowCall call, string msg) {
|
||||
exists(int c |
|
||||
c = count(call.getEnclosingCallable()) and
|
||||
c != 1 and
|
||||
not any(ConsistencyConfiguration conf).uniqueCallEnclosingCallableExclude(call) and
|
||||
msg = "Call should have one enclosing callable but has " + c + "."
|
||||
)
|
||||
}
|
||||
|
||||
query predicate uniqueType(Node n, string msg) {
|
||||
exists(int c |
|
||||
n instanceof RelevantNode and
|
||||
c = count(getNodeType(n)) and
|
||||
c != 1 and
|
||||
msg = "Node should have one type but has " + c + "."
|
||||
)
|
||||
}
|
||||
|
||||
query predicate uniqueNodeLocation(Node n, string msg) {
|
||||
exists(int c |
|
||||
c =
|
||||
count(string filepath, int startline, int startcolumn, int endline, int endcolumn |
|
||||
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
) and
|
||||
c != 1 and
|
||||
not any(ConsistencyConfiguration conf).uniqueNodeLocationExclude(n) and
|
||||
msg = "Node should have one location but has " + c + "."
|
||||
)
|
||||
}
|
||||
|
||||
query predicate missingLocation(string msg) {
|
||||
exists(int c |
|
||||
c =
|
||||
strictcount(Node n |
|
||||
not n.hasLocationInfo(_, _, _, _, _) and
|
||||
not any(ConsistencyConfiguration conf).missingLocationExclude(n)
|
||||
) and
|
||||
msg = "Nodes without location: " + c
|
||||
)
|
||||
}
|
||||
|
||||
query predicate uniqueNodeToString(Node n, string msg) {
|
||||
exists(int c |
|
||||
c = count(n.toString()) and
|
||||
c != 1 and
|
||||
msg = "Node should have one toString but has " + c + "."
|
||||
)
|
||||
}
|
||||
|
||||
query predicate missingToString(string msg) {
|
||||
exists(int c |
|
||||
c = strictcount(Node n | not exists(n.toString())) and
|
||||
msg = "Nodes without toString: " + c
|
||||
)
|
||||
}
|
||||
|
||||
query predicate parameterCallable(ParameterNode p, string msg) {
|
||||
exists(DataFlowCallable c | isParameterNode(p, c, _) and c != nodeGetEnclosingCallable(p)) and
|
||||
msg = "Callable mismatch for parameter."
|
||||
}
|
||||
|
||||
query predicate localFlowIsLocal(Node n1, Node n2, string msg) {
|
||||
simpleLocalFlowStep(n1, n2) and
|
||||
nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and
|
||||
msg = "Local flow step does not preserve enclosing callable."
|
||||
}
|
||||
|
||||
query predicate readStepIsLocal(Node n1, Node n2, string msg) {
|
||||
readStep(n1, _, n2) and
|
||||
nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and
|
||||
msg = "Read step does not preserve enclosing callable."
|
||||
}
|
||||
|
||||
query predicate storeStepIsLocal(Node n1, Node n2, string msg) {
|
||||
storeStep(n1, _, n2) and
|
||||
nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and
|
||||
msg = "Store step does not preserve enclosing callable."
|
||||
}
|
||||
|
||||
private DataFlowType typeRepr() { result = getNodeType(_) }
|
||||
|
||||
query predicate compatibleTypesReflexive(DataFlowType t, string msg) {
|
||||
t = typeRepr() and
|
||||
not compatibleTypes(t, t) and
|
||||
msg = "Type compatibility predicate is not reflexive."
|
||||
}
|
||||
|
||||
query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) {
|
||||
isUnreachableInCall(n, call) and
|
||||
exists(DataFlowCallable c |
|
||||
c = nodeGetEnclosingCallable(n) and
|
||||
not viableCallable(call) = c
|
||||
) and
|
||||
msg = "Call context for isUnreachableInCall is inconsistent with call graph."
|
||||
}
|
||||
|
||||
query predicate localCallNodes(DataFlowCall call, Node n, string msg) {
|
||||
(
|
||||
n = getAnOutNode(call, _) and
|
||||
msg = "OutNode and call does not share enclosing callable."
|
||||
or
|
||||
n.(ArgumentNode).argumentOf(call, _) and
|
||||
msg = "ArgumentNode and call does not share enclosing callable."
|
||||
) and
|
||||
nodeGetEnclosingCallable(n) != call.getEnclosingCallable()
|
||||
}
|
||||
|
||||
// This predicate helps the compiler forget that in some languages
|
||||
// it is impossible for a result of `getPreUpdateNode` to be an
|
||||
// instance of `PostUpdateNode`.
|
||||
private Node getPre(PostUpdateNode n) {
|
||||
result = n.getPreUpdateNode()
|
||||
predicate argHasPostUpdateExclude(ArgumentNode n) {
|
||||
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isStarArgs(_))
|
||||
or
|
||||
none()
|
||||
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat())
|
||||
}
|
||||
|
||||
query predicate postIsNotPre(PostUpdateNode n, string msg) {
|
||||
getPre(n) = n and
|
||||
msg = "PostUpdateNode should not equal its pre-update node."
|
||||
predicate reverseReadExclude(Node n) {
|
||||
// since `self`/`cls` parameters can be marked as implicit argument to `super()`,
|
||||
// they will have PostUpdateNodes. We have a read-step from the synthetic `**kwargs`
|
||||
// parameter, but dataflow-consistency queries should _not_ complain about there not
|
||||
// being a post-update node for the synthetic `**kwargs` parameter.
|
||||
n instanceof SynthDictSplatParameterNode
|
||||
}
|
||||
|
||||
query predicate postHasUniquePre(PostUpdateNode n, string msg) {
|
||||
not any(ConsistencyConfiguration conf).postHasUniquePreExclude(n) and
|
||||
exists(int c |
|
||||
c = count(n.getPreUpdateNode()) and
|
||||
c != 1 and
|
||||
msg = "PostUpdateNode should have one pre-update node but has " + c + "."
|
||||
predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
|
||||
// For normal parameters that can both be passed as positional arguments or keyword
|
||||
// arguments, we currently have parameter positions for both cases..
|
||||
//
|
||||
// TODO: Figure out how bad breaking this consistency check is
|
||||
exists(Function func, Parameter param |
|
||||
c.getScope() = func and
|
||||
p = parameterNode(param) and
|
||||
c.getParameter(pos) = p and
|
||||
param = func.getArg(_) and
|
||||
param = func.getArgByName(_)
|
||||
)
|
||||
}
|
||||
|
||||
query predicate uniquePostUpdate(Node n, string msg) {
|
||||
not any(ConsistencyConfiguration conf).uniquePostUpdateExclude(n) and
|
||||
1 < strictcount(PostUpdateNode post | post.getPreUpdateNode() = n) and
|
||||
msg = "Node has multiple PostUpdateNodes."
|
||||
predicate uniqueCallEnclosingCallableExclude(DataFlowCall call) {
|
||||
not exists(call.getLocation().getFile().getRelativePath())
|
||||
}
|
||||
|
||||
query predicate postIsInSameCallable(PostUpdateNode n, string msg) {
|
||||
nodeGetEnclosingCallable(n) != nodeGetEnclosingCallable(n.getPreUpdateNode()) and
|
||||
msg = "PostUpdateNode does not share callable with its pre-update node."
|
||||
predicate identityLocalStepExclude(Node n) {
|
||||
not exists(n.getLocation().getFile().getRelativePath())
|
||||
}
|
||||
|
||||
private predicate hasPost(Node n) { exists(PostUpdateNode post | post.getPreUpdateNode() = n) }
|
||||
|
||||
query predicate reverseRead(Node n, string msg) {
|
||||
exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and
|
||||
not any(ConsistencyConfiguration conf).reverseReadExclude(n) and
|
||||
msg = "Origin of readStep is missing a PostUpdateNode."
|
||||
}
|
||||
|
||||
query predicate argHasPostUpdate(ArgumentNode n, string msg) {
|
||||
not hasPost(n) and
|
||||
not any(ConsistencyConfiguration c).argHasPostUpdateExclude(n) and
|
||||
msg = "ArgumentNode is missing PostUpdateNode."
|
||||
}
|
||||
|
||||
// This predicate helps the compiler forget that in some languages
|
||||
// it is impossible for a `PostUpdateNode` to be the target of
|
||||
// `simpleLocalFlowStep`.
|
||||
private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() }
|
||||
|
||||
query predicate postWithInFlow(Node n, string msg) {
|
||||
isPostUpdateNode(n) and
|
||||
not clearsContent(n, _) and
|
||||
simpleLocalFlowStep(_, n) and
|
||||
not any(ConsistencyConfiguration c).postWithInFlowExclude(n) and
|
||||
msg = "PostUpdateNode should not be the target of local flow."
|
||||
}
|
||||
|
||||
query predicate viableImplInCallContextTooLarge(
|
||||
DataFlowCall call, DataFlowCall ctx, DataFlowCallable callable
|
||||
) {
|
||||
callable = viableImplInCallContext(call, ctx) and
|
||||
not callable = viableCallable(call) and
|
||||
not any(ConsistencyConfiguration c).viableImplInCallContextTooLargeExclude(call, ctx, callable)
|
||||
}
|
||||
|
||||
query predicate uniqueParameterNodeAtPosition(
|
||||
DataFlowCallable c, ParameterPosition pos, Node p, string msg
|
||||
) {
|
||||
not any(ConsistencyConfiguration conf).uniqueParameterNodeAtPositionExclude(c, pos, p) and
|
||||
isParameterNode(p, c, pos) and
|
||||
not exists(unique(Node p0 | isParameterNode(p0, c, pos))) and
|
||||
msg = "Parameters with overlapping positions."
|
||||
}
|
||||
|
||||
query predicate uniqueParameterNodePosition(
|
||||
DataFlowCallable c, ParameterPosition pos, Node p, string msg
|
||||
) {
|
||||
not any(ConsistencyConfiguration conf).uniqueParameterNodePositionExclude(c, pos, p) and
|
||||
isParameterNode(p, c, pos) and
|
||||
not exists(unique(ParameterPosition pos0 | isParameterNode(p, c, pos0))) and
|
||||
msg = "Parameter node with multiple positions."
|
||||
}
|
||||
|
||||
query predicate uniqueContentApprox(Content c, string msg) {
|
||||
not exists(unique(ContentApprox approx | approx = getContentApprox(c))) and
|
||||
msg = "Non-unique content approximation."
|
||||
}
|
||||
|
||||
query predicate identityLocalStep(Node n, string msg) {
|
||||
simpleLocalFlowStep(n, n) and
|
||||
not any(ConsistencyConfiguration c).identityLocalStepExclude(n) and
|
||||
msg = "Node steps to itself"
|
||||
predicate multipleArgumentCallExclude(ArgumentNode arg, DataFlowCall call) {
|
||||
isArgumentNode(arg, call, _)
|
||||
}
|
||||
}
|
||||
|
||||
module Consistency = MakeConsistency<PythonDataFlow, PythonTaintTracking, Input>;
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
* Provides Python-specific definitions for use in the data flow library.
|
||||
*/
|
||||
|
||||
private import codeql.dataflow.DataFlow
|
||||
// we need to export `Unit` for the DataFlowImpl* files
|
||||
private import python as Python
|
||||
|
||||
@@ -13,3 +14,12 @@ module Public {
|
||||
import DataFlowPublic
|
||||
import DataFlowUtil
|
||||
}
|
||||
|
||||
module PythonDataFlow implements InputSig {
|
||||
import Private
|
||||
import Public
|
||||
|
||||
predicate neverSkipInPathGraph = Private::neverSkipInPathGraph/1;
|
||||
|
||||
Node exprNode(DataFlowExpr e) { result = Public::exprNode(e) }
|
||||
}
|
||||
|
||||
@@ -22,8 +22,8 @@ import DataFlowDispatch
|
||||
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
|
||||
|
||||
/** Holds if `p` is a `ParameterNode` of `c` with position `pos`. */
|
||||
predicate isParameterNode(ParameterNodeImpl p, DataFlowCallable c, ParameterPosition pos) {
|
||||
p.isParameterOf(c, pos)
|
||||
predicate isParameterNode(ParameterNode p, DataFlowCallable c, ParameterPosition pos) {
|
||||
p.(ParameterNodeImpl).isParameterOf(c, pos)
|
||||
}
|
||||
|
||||
/** Holds if `arg` is an `ArgumentNode` of `c` with position `pos`. */
|
||||
@@ -279,14 +279,16 @@ class NonSyntheticPostUpdateNode extends PostUpdateNodeImpl, CfgNode {
|
||||
class DataFlowExpr = Expr;
|
||||
|
||||
/**
|
||||
* Flow between ESSA variables.
|
||||
* This includes both local and global variables.
|
||||
* Flow comes from definitions, uses and refinements.
|
||||
* A module to compute local flow.
|
||||
*
|
||||
* Flow will generally go from control flow nodes into essa variables at definitions,
|
||||
* and from there via use-use flow to other control flow nodes.
|
||||
*
|
||||
* Some syntaxtic constructs are handled separately.
|
||||
*/
|
||||
// TODO: Consider constraining `nodeFrom` and `nodeTo` to be in the same scope.
|
||||
// If they have different enclosing callables, we get consistency errors.
|
||||
module EssaFlow {
|
||||
predicate essaFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
module LocalFlow {
|
||||
/** Holds if `nodeFrom` is the control flow node defining the essa variable `nodeTo`. */
|
||||
predicate definitionFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// Definition
|
||||
// `x = f(42)`
|
||||
// nodeFrom is `f(42)`, cfg node
|
||||
@@ -304,8 +306,12 @@ module EssaFlow {
|
||||
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
|
||||
with.getContextExpr() = contextManager.getNode() and
|
||||
with.getOptionalVars() = var.getNode() and
|
||||
not with.isAsync() and
|
||||
contextManager.strictlyDominates(var)
|
||||
// note: we allow this for both `with` and `async with`, since some
|
||||
// implementations do `async def __aenter__(self): return self`, so you can do
|
||||
// both:
|
||||
// * `foo = x.foo(); await foo.async_method(); foo.close()` and
|
||||
// * `async with x.foo() as foo: await foo.async_method()`.
|
||||
)
|
||||
or
|
||||
// Async with var definition
|
||||
@@ -314,6 +320,12 @@ module EssaFlow {
|
||||
// nodeTo is `x`, essa var
|
||||
//
|
||||
// This makes the cfg node the local source of the awaited value.
|
||||
//
|
||||
// We have this step in addition to the step above, to handle cases where the QL
|
||||
// modeling of `f(42)` requires a `.getAwaited()` step (in API graphs) when not
|
||||
// using `async with`, so you can do both:
|
||||
// * `foo = await x.foo(); await foo.async_method(); foo.close()` and
|
||||
// * `async with x.foo() as foo: await foo.async_method()`.
|
||||
exists(With with, ControlFlowNode var |
|
||||
nodeFrom.(CfgNode).getNode() = var and
|
||||
nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
|
||||
@@ -326,10 +338,37 @@ module EssaFlow {
|
||||
// nodeFrom is `x`, cfgNode
|
||||
// nodeTo is `x`, essa var
|
||||
exists(ParameterDefinition pd |
|
||||
nodeFrom.asCfgNode() = pd.getDefiningNode() and
|
||||
nodeTo.asVar() = pd.getVariable()
|
||||
nodeFrom.(CfgNode).getNode() = pd.getDefiningNode() and
|
||||
nodeTo.(EssaNode).getVar() = pd.getVariable()
|
||||
)
|
||||
}
|
||||
|
||||
predicate expressionFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// If expressions
|
||||
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
|
||||
or
|
||||
// Assignment expressions
|
||||
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(AssignmentExprNode).getValue()
|
||||
or
|
||||
// boolean inline expressions such as `x or y` or `x and y`
|
||||
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(BoolExprNode).getAnOperand()
|
||||
or
|
||||
// Flow inside an unpacking assignment
|
||||
iterableUnpackingFlowStep(nodeFrom, nodeTo)
|
||||
or
|
||||
// Flow inside a match statement
|
||||
matchFlowStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) {
|
||||
AdjacentUses::adjacentUseUse(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
predicate defToFirstUse(EssaVariable var, NameNode nodeTo) {
|
||||
AdjacentUses::firstUse(var.getDefinition(), nodeTo)
|
||||
}
|
||||
|
||||
predicate useUseFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// First use after definition
|
||||
// `y = 42`
|
||||
// `x = f(y)`
|
||||
@@ -343,31 +382,105 @@ module EssaFlow {
|
||||
// nodeFrom is 'y' on first line, cfg node
|
||||
// nodeTo is `y` on second line, cfg node
|
||||
useToNextUse(nodeFrom.asCfgNode(), nodeTo.asCfgNode())
|
||||
or
|
||||
// If expressions
|
||||
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
|
||||
or
|
||||
// boolean inline expressions such as `x or y` or `x and y`
|
||||
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(BoolExprNode).getAnOperand()
|
||||
or
|
||||
// Flow inside an unpacking assignment
|
||||
iterableUnpackingFlowStep(nodeFrom, nodeTo)
|
||||
or
|
||||
matchFlowStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) {
|
||||
AdjacentUses::adjacentUseUse(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
predicate defToFirstUse(EssaVariable var, NameNode nodeTo) {
|
||||
AdjacentUses::firstUse(var.getDefinition(), nodeTo)
|
||||
predicate localFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
IncludePostUpdateFlow<PhaseDependentFlow<definitionFlowStep/2>::step/2>::step(nodeFrom, nodeTo)
|
||||
or
|
||||
IncludePostUpdateFlow<PhaseDependentFlow<expressionFlowStep/2>::step/2>::step(nodeFrom, nodeTo)
|
||||
or
|
||||
// Blindly applying use-use flow can result in a node that steps to itself, for
|
||||
// example in while-loops. To uphold dataflow consistency checks, we don't want
|
||||
// that. However, we do want to allow `[post] n` to `n` (to handle while loops), so
|
||||
// we should only do the filtering after `IncludePostUpdateFlow` has ben applied.
|
||||
IncludePostUpdateFlow<PhaseDependentFlow<useUseFlowStep/2>::step/2>::step(nodeFrom, nodeTo) and
|
||||
nodeFrom != nodeTo
|
||||
}
|
||||
}
|
||||
|
||||
//--------
|
||||
// Local flow
|
||||
//--------
|
||||
/** A module for transforming step relations. */
|
||||
module StepRelationTransformations {
|
||||
/**
|
||||
* Holds if there is a step from `nodeFrom` to `nodeTo` in
|
||||
* the step relation to be transformed.
|
||||
*
|
||||
* This is the input relation to the transformations.
|
||||
*/
|
||||
signature predicate stepSig(Node nodeFrom, Node nodeTo);
|
||||
|
||||
/**
|
||||
* A module to separate import-time from run-time.
|
||||
*
|
||||
* We really have two local flow relations, one for module initialisation time (or _import time_) and one for runtime.
|
||||
* Consider a read from a global variable `x = foo`. At import time there should be a local flow step from `foo` to `x`,
|
||||
* while at runtime there should be a jump step from the module variable corresponding to `foo` to `x`.
|
||||
*
|
||||
* Similarly, for a write `foo = y`, at import time, there is a local flow step from `y` to `foo` while at runtime there
|
||||
* is a jump step from `y` to the module variable corresponding to `foo`.
|
||||
*
|
||||
* We need a way of distinguishing if we are looking at import time or runtime. We have the following helpful facts:
|
||||
* - All top-level executable statements are import time (and import time only)
|
||||
* - All non-top-level code may be executed at runtime (but could also be executed at import time)
|
||||
*
|
||||
* We could write an analysis to determine which functions are called at import time, but until we have that, we will go
|
||||
* with the heuristic that global variables act according to import time rules at top-level program points and according
|
||||
* to runtime rules everywhere else. This will forego some import time local flow but otherwise be consistent.
|
||||
*/
|
||||
module PhaseDependentFlow<stepSig/2 rawStep> {
|
||||
/**
|
||||
* Holds if `node` is found at the top level of a module.
|
||||
*/
|
||||
pragma[inline]
|
||||
private predicate isTopLevel(Node node) { node.getScope() instanceof Module }
|
||||
|
||||
/** Holds if a step can be taken from `nodeFrom` to `nodeTo` at import time. */
|
||||
predicate importTimeStep(Node nodeFrom, Node nodeTo) {
|
||||
// As a proxy for whether statements can be executed at import time,
|
||||
// we check if they appear at the top level.
|
||||
// This will miss statements inside functions called from the top level.
|
||||
isTopLevel(nodeFrom) and
|
||||
isTopLevel(nodeTo) and
|
||||
rawStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/** Holds if a step can be taken from `nodeFrom` to `nodeTo` at runtime. */
|
||||
predicate runtimeStep(Node nodeFrom, Node nodeTo) {
|
||||
// Anything not at the top level can be executed at runtime.
|
||||
not isTopLevel(nodeFrom) and
|
||||
not isTopLevel(nodeTo) and
|
||||
rawStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a step can be taken from `nodeFrom` to `nodeTo`.
|
||||
*/
|
||||
predicate step(Node nodeFrom, Node nodeTo) {
|
||||
importTimeStep(nodeFrom, nodeTo) or
|
||||
runtimeStep(nodeFrom, nodeTo)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A module to add steps from post-update nodes.
|
||||
* Whenever there is a step from `x` to `y`,
|
||||
* we add a step from `[post] x` to `y`.
|
||||
*/
|
||||
module IncludePostUpdateFlow<stepSig/2 rawStep> {
|
||||
predicate step(Node nodeFrom, Node nodeTo) {
|
||||
// We either have a raw step from `nodeFrom`...
|
||||
rawStep(nodeFrom, nodeTo)
|
||||
or
|
||||
// ...or we have a raw step from a pre-update node of `nodeFrom`
|
||||
rawStep(nodeFrom.(PostUpdateNode).getPreUpdateNode(), nodeTo)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
import StepRelationTransformations
|
||||
|
||||
/**
|
||||
* This is the local flow predicate that is used as a building block in global
|
||||
* data flow.
|
||||
@@ -388,69 +501,17 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
* or at runtime when callables in the module are called.
|
||||
*/
|
||||
predicate simpleLocalFlowStepForTypetracking(Node nodeFrom, Node nodeTo) {
|
||||
// If there is local flow out of a node `node`, we want flow
|
||||
// both out of `node` and any post-update node of `node`.
|
||||
exists(Node node |
|
||||
nodeFrom = update(node) and
|
||||
(
|
||||
importTimeLocalFlowStep(node, nodeTo) or
|
||||
runtimeLocalFlowStep(node, nodeTo)
|
||||
)
|
||||
)
|
||||
IncludePostUpdateFlow<PhaseDependentFlow<LocalFlow::localFlowStep/2>::step/2>::step(nodeFrom,
|
||||
nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `node` is found at the top level of a module.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate isTopLevel(Node node) { node.getScope() instanceof Module }
|
||||
|
||||
/** Holds if there is local flow from `nodeFrom` to `nodeTo` at import time. */
|
||||
predicate importTimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// As a proxy for whether statements can be executed at import time,
|
||||
// we check if they appear at the top level.
|
||||
// This will miss statements inside functions called from the top level.
|
||||
isTopLevel(nodeFrom) and
|
||||
isTopLevel(nodeTo) and
|
||||
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/** Holds if there is local flow from `nodeFrom` to `nodeTo` at runtime. */
|
||||
predicate runtimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// Anything not at the top level can be executed at runtime.
|
||||
not isTopLevel(nodeFrom) and
|
||||
not isTopLevel(nodeTo) and
|
||||
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
|
||||
private predicate summaryLocalStep(Node nodeFrom, Node nodeTo) {
|
||||
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(),
|
||||
nodeTo.(FlowSummaryNode).getSummaryNode(), true)
|
||||
}
|
||||
|
||||
predicate summaryFlowSteps(Node nodeFrom, Node nodeTo) {
|
||||
// If there is local flow out of a node `node`, we want flow
|
||||
// both out of `node` and any post-update node of `node`.
|
||||
exists(Node node |
|
||||
nodeFrom = update(node) and
|
||||
(
|
||||
importTimeSummaryFlowStep(node, nodeTo) or
|
||||
runtimeSummaryFlowStep(node, nodeTo)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
predicate importTimeSummaryFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// As a proxy for whether statements can be executed at import time,
|
||||
// we check if they appear at the top level.
|
||||
// This will miss statements inside functions called from the top level.
|
||||
isTopLevel(nodeFrom) and
|
||||
isTopLevel(nodeTo) and
|
||||
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(),
|
||||
nodeTo.(FlowSummaryNode).getSummaryNode(), true)
|
||||
}
|
||||
|
||||
predicate runtimeSummaryFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// Anything not at the top level can be executed at runtime.
|
||||
not isTopLevel(nodeFrom) and
|
||||
not isTopLevel(nodeTo) and
|
||||
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(),
|
||||
nodeTo.(FlowSummaryNode).getSummaryNode(), true)
|
||||
IncludePostUpdateFlow<PhaseDependentFlow<summaryLocalStep/2>::step/2>::step(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/** `ModuleVariable`s are accessed via jump steps at runtime. */
|
||||
@@ -463,15 +524,15 @@ predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) {
|
||||
or
|
||||
// Setting the possible values of the variable at the end of import time
|
||||
nodeFrom = nodeTo.(ModuleVariableNode).getADefiningWrite()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `result` is either `node`, or the post-update node for `node`.
|
||||
*/
|
||||
private Node update(Node node) {
|
||||
result = node
|
||||
or
|
||||
result.(PostUpdateNode).getPreUpdateNode() = node
|
||||
// a parameter with a default value, since the parameter will be in the scope of the
|
||||
// function, while the default value itself will be in the scope that _defines_ the
|
||||
// function.
|
||||
exists(ParameterDefinition param |
|
||||
// note: we go to the _control-flow node_ of the parameter, and not the ESSA node of the parameter, since for type-tracking, the ESSA node is not a LocalSourceNode, so we would get in trouble.
|
||||
nodeFrom.asCfgNode() = param.getDefault() and
|
||||
nodeTo.asCfgNode() = param.getDefiningNode()
|
||||
)
|
||||
}
|
||||
|
||||
//--------
|
||||
@@ -486,15 +547,29 @@ class DataFlowType extends TDataFlowType {
|
||||
|
||||
/** A node that performs a type cast. */
|
||||
class CastNode extends Node {
|
||||
// We include read- and store steps here to force them to be
|
||||
// shown in path explanations.
|
||||
// This hack is necessary, because we have included some of these
|
||||
// steps as default taint steps, making them be suppressed in path
|
||||
// explanations.
|
||||
// We should revert this once, we can remove this steps from the
|
||||
// default taint steps; this should be possible once we have
|
||||
// implemented flow summaries and recursive content.
|
||||
CastNode() { readStep(_, _, this) or storeStep(_, _, this) }
|
||||
CastNode() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `n` should never be skipped over in the `PathGraph` and in path
|
||||
* explanations.
|
||||
*/
|
||||
predicate neverSkipInPathGraph(Node n) {
|
||||
// NOTE: We could use RHS of a definition, but since we have use-use flow, in an
|
||||
// example like
|
||||
// ```py
|
||||
// x = SOURCE()
|
||||
// if <cond>:
|
||||
// y = x
|
||||
// SINK(x)
|
||||
// ```
|
||||
// we would end up saying that the path MUST not skip the x in `y = x`, which is just
|
||||
// annoying and doesn't help the path explanation become clearer.
|
||||
n.asVar() instanceof EssaDefinition and
|
||||
// For a parameter we have flow from ControlFlowNode to SSA node, and then onwards
|
||||
// with use-use flow, and since the CFN is already part of the path graph, we don't
|
||||
// want to force showing the SSA node as well.
|
||||
not n.asVar() instanceof ParameterDefinition
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -506,6 +581,8 @@ predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }
|
||||
|
||||
predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }
|
||||
|
||||
predicate localMustFlowStep(Node node1, Node node2) { none() }
|
||||
|
||||
/**
|
||||
* Gets the type of `node`.
|
||||
*/
|
||||
@@ -556,9 +633,6 @@ predicate jumpStepSharedWithTypeTracker(Node nodeFrom, Node nodeTo) {
|
||||
r.getAttributeName(), nodeFrom) and
|
||||
nodeTo = r
|
||||
)
|
||||
or
|
||||
// Default value for parameter flows to that parameter
|
||||
defaultValueFlowStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -584,7 +658,7 @@ predicate jumpStepNotSharedWithTypeTracker(Node nodeFrom, Node nodeTo) {
|
||||
* Holds if data can flow from `nodeFrom` to `nodeTo` via an assignment to
|
||||
* content `c`.
|
||||
*/
|
||||
predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
listStoreStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
setStoreStep(nodeFrom, c, nodeTo)
|
||||
@@ -772,30 +846,34 @@ predicate comprehensionStoreStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
|
||||
* ```
|
||||
* data flows from `x` to the attribute `foo` of (the post-update node for) `obj`.
|
||||
*/
|
||||
predicate attributeStoreStep(Node nodeFrom, AttributeContent c, PostUpdateNode nodeTo) {
|
||||
exists(AttrWrite write |
|
||||
write.accesses(nodeTo.getPreUpdateNode(), c.getAttribute()) and
|
||||
nodeFrom = write.getValue()
|
||||
)
|
||||
}
|
||||
|
||||
predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
|
||||
exists(Function f, Parameter p, ParameterDefinition def |
|
||||
// `getArgByName` supports, unlike `getAnArg`, keyword-only parameters
|
||||
p = f.getArgByName(_) and
|
||||
nodeFrom.asExpr() = p.getDefault() and
|
||||
// The following expresses
|
||||
// nodeTo.(ParameterNode).getParameter() = p
|
||||
// without non-monotonic recursion
|
||||
def.getParameter() = p and
|
||||
nodeTo.getNode() = def.getDefiningNode()
|
||||
predicate attributeStoreStep(Node nodeFrom, AttributeContent c, Node nodeTo) {
|
||||
exists(Node object |
|
||||
// Normally we target a PostUpdateNode. However, for class definitions the class
|
||||
// is only constructed after evaluating its' entire scope, so in terms of python
|
||||
// evaluations there is no post or pre update nodes, just one node for the class
|
||||
// expression. Therefore we target the class expression directly.
|
||||
//
|
||||
// Note: Due to the way we handle decorators, using a class decorator will result in
|
||||
// there being a post-update node for the class (argument to the decorator). We do
|
||||
// not want to differentiate between these two cases, so still target the class
|
||||
// expression directly.
|
||||
object = nodeTo.(PostUpdateNode).getPreUpdateNode() and
|
||||
not object.asExpr() instanceof ClassExpr
|
||||
or
|
||||
object = nodeTo and
|
||||
object.asExpr() instanceof ClassExpr
|
||||
|
|
||||
exists(AttrWrite write |
|
||||
write.accesses(object, c.getAttribute()) and
|
||||
nodeFrom = write.getValue()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
|
||||
*/
|
||||
predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
|
||||
subscriptReadStep(nodeFrom, c, nodeTo)
|
||||
or
|
||||
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
|
||||
@@ -870,7 +948,7 @@ predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo)
|
||||
* any value stored inside `f` is cleared at the pre-update node associated with `x`
|
||||
* in `x.f = newValue`.
|
||||
*/
|
||||
predicate clearsContent(Node n, Content c) {
|
||||
predicate clearsContent(Node n, ContentSet c) {
|
||||
matchClearStep(n, c)
|
||||
or
|
||||
attributeClearStep(n, c)
|
||||
@@ -922,8 +1000,6 @@ DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) {
|
||||
*/
|
||||
predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable c) { none() }
|
||||
|
||||
int accessPathLimit() { result = 5 }
|
||||
|
||||
/**
|
||||
* Holds if access paths with `c` at their head always should be tracked at high
|
||||
* precision. This disables adaptive access path precision for such access paths.
|
||||
@@ -986,11 +1062,10 @@ class ContentApprox = Unit;
|
||||
pragma[inline]
|
||||
ContentApprox getContentApprox(Content c) { any() }
|
||||
|
||||
/**
|
||||
* Gets an additional term that is added to the `join` and `branch` computations to reflect
|
||||
* an additional forward or backwards branching factor that is not taken into account
|
||||
* when calculating the (virtual) dispatch cost.
|
||||
*
|
||||
* Argument `arg` is part of a path from a source to a sink, and `p` is the target parameter.
|
||||
*/
|
||||
int getAdditionalFlowIntoCallNodeTerm(ArgumentNode arg, ParameterNode p) { none() }
|
||||
/** Helper for `.getEnclosingCallable`. */
|
||||
DataFlowCallable getCallableScope(Scope s) {
|
||||
result.getScope() = s
|
||||
or
|
||||
not exists(DataFlowCallable c | c.getScope() = s) and
|
||||
result = getCallableScope(s.getEnclosingScope())
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import LocalSources
|
||||
private import semmle.python.essa.SsaCompute
|
||||
private import semmle.python.dataflow.new.internal.ImportStar
|
||||
private import FlowSummaryImpl as FlowSummaryImpl
|
||||
private import semmle.python.frameworks.data.ModelsAsData
|
||||
|
||||
/**
|
||||
* IPA type for data flow nodes.
|
||||
@@ -117,14 +118,6 @@ newtype TNode =
|
||||
exists(ParameterPosition ppos | ppos.isKeyword(_) | exists(callable.getParameter(ppos)))
|
||||
}
|
||||
|
||||
/** Helper for `Node::getEnclosingCallable`. */
|
||||
private DataFlowCallable getCallableScope(Scope s) {
|
||||
result.getScope() = s
|
||||
or
|
||||
not exists(DataFlowCallable c | c.getScope() = s) and
|
||||
result = getCallableScope(s.getEnclosingScope())
|
||||
}
|
||||
|
||||
private import semmle.python.internal.CachedStages
|
||||
|
||||
/**
|
||||
@@ -400,7 +393,7 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
|
||||
override Scope getScope() { result = mod }
|
||||
|
||||
override string toString() {
|
||||
result = "ModuleVariableNode in " + mod.toString() + " for " + var.getId()
|
||||
result = "ModuleVariableNode in " + concat( | | mod.toString(), ",") + " for " + var.getId()
|
||||
}
|
||||
|
||||
/** Gets the module in which this variable appears. */
|
||||
@@ -580,32 +573,6 @@ module BarrierGuard<guardChecksSig/3 guardChecks> {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `BarrierGuard` module instead.
|
||||
*
|
||||
* A guard that validates some expression.
|
||||
*
|
||||
* To use this in a configuration, extend the class and provide a
|
||||
* characteristic predicate precisely specifying the guard, and override
|
||||
* `checks` to specify what is being validated and in which branch.
|
||||
*
|
||||
* It is important that all extending classes in scope are disjoint.
|
||||
*/
|
||||
deprecated class BarrierGuard extends GuardNode {
|
||||
/** Holds if this guard validates `node` upon evaluating to `branch`. */
|
||||
abstract predicate checks(ControlFlowNode node, boolean branch);
|
||||
|
||||
/** Gets a node guarded by this guard. */
|
||||
final ExprNode getAGuardedNode() {
|
||||
exists(EssaDefinition def, ControlFlowNode node, boolean branch |
|
||||
AdjacentUses::useOfDef(def, node) and
|
||||
this.checks(node, branch) and
|
||||
AdjacentUses::useOfDef(def, result.asCfgNode()) and
|
||||
this.controlsBlock(result.asCfgNode().getBasicBlock(), branch)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Algebraic datatype for tracking data content associated with values.
|
||||
* Content can be collection elements or object attributes.
|
||||
@@ -621,6 +588,11 @@ newtype TContent =
|
||||
or
|
||||
// Arguments can overflow and end up in the starred parameter tuple.
|
||||
exists(any(CallNode cn).getArg(index))
|
||||
or
|
||||
// since flow summaries might use tuples, we ensure that we at least have valid
|
||||
// TTupleElementContent for the 0..7 (7 was picked to match `small_tuple` in
|
||||
// data-flow-private)
|
||||
index in [0 .. 7]
|
||||
} or
|
||||
/** An element of a dictionary under a specific key. */
|
||||
TDictionaryElementContent(string key) {
|
||||
@@ -631,7 +603,30 @@ newtype TContent =
|
||||
/** An element of a dictionary under any key. */
|
||||
TDictionaryElementAnyContent() or
|
||||
/** An object attribute. */
|
||||
TAttributeContent(string attr) { attr = any(Attribute a).getName() }
|
||||
TAttributeContent(string attr) {
|
||||
attr = any(Attribute a).getName()
|
||||
or
|
||||
// Flow summaries that target attributes rely on a TAttributeContent being
|
||||
// available. However, since the code above only constructs a TAttributeContent
|
||||
// based on the attribute names seen in the DB, we can end up in a scenario where
|
||||
// flow summaries don't work due to missing TAttributeContent. To get around this,
|
||||
// we need to add the attribute names used by flow summaries. This needs to be done
|
||||
// both for the summaries written in QL and the ones written in data-extension
|
||||
// files.
|
||||
//
|
||||
// 1) Summaries in QL. Sadly the following code leads to non-monotonic recursion
|
||||
// name = any(AccessPathToken a).getAnArgument("Attribute")
|
||||
// instead we use a qltest to alert if we write a new summary in QL that uses an
|
||||
// attribute -- see
|
||||
// python/ql/test/experimental/dataflow/summaries-checks/missing-attribute-content.ql
|
||||
attr in ["re", "string", "pattern"]
|
||||
or
|
||||
//
|
||||
// 2) summaries in data-extension files
|
||||
exists(string input, string output | ModelOutput::relevantSummaryModel(_, _, input, output, _) |
|
||||
attr = [input, output].regexpFind("(?<=(^|\\.)Attribute\\[)[^\\]]+(?=\\])", _, _).trim()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow value can have associated content.
|
||||
|
||||
@@ -23,29 +23,30 @@ module Public {
|
||||
* content type, or a return kind.
|
||||
*/
|
||||
class SummaryComponent extends TSummaryComponent {
|
||||
/** Gets a textual representation of this summary component. */
|
||||
string toString() {
|
||||
exists(ContentSet c | this = TContentSummaryComponent(c) and result = c.toString())
|
||||
or
|
||||
exists(ContentSet c | this = TWithoutContentSummaryComponent(c) and result = "without " + c)
|
||||
or
|
||||
exists(ContentSet c | this = TWithContentSummaryComponent(c) and result = "with " + c)
|
||||
/** Gets a textual representation of this component used for MaD models. */
|
||||
string getMadRepresentation() {
|
||||
result = getMadRepresentationSpecific(this)
|
||||
or
|
||||
exists(ArgumentPosition pos |
|
||||
this = TParameterSummaryComponent(pos) and result = "parameter " + pos
|
||||
this = TParameterSummaryComponent(pos) and
|
||||
result = "Parameter[" + getArgumentPosition(pos) + "]"
|
||||
)
|
||||
or
|
||||
exists(ParameterPosition pos |
|
||||
this = TArgumentSummaryComponent(pos) and result = "argument " + pos
|
||||
this = TArgumentSummaryComponent(pos) and
|
||||
result = "Argument[" + getParameterPosition(pos) + "]"
|
||||
)
|
||||
or
|
||||
exists(ReturnKind rk | this = TReturnSummaryComponent(rk) and result = "return (" + rk + ")")
|
||||
or
|
||||
exists(SummaryComponent::SyntheticGlobal sg |
|
||||
this = TSyntheticGlobalSummaryComponent(sg) and
|
||||
result = "synthetic global (" + sg + ")"
|
||||
exists(string synthetic |
|
||||
this = TSyntheticGlobalSummaryComponent(synthetic) and
|
||||
result = "SyntheticGlobal[" + synthetic + "]"
|
||||
)
|
||||
or
|
||||
this = TReturnSummaryComponent(getReturnValueKind()) and result = "ReturnValue"
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this summary component. */
|
||||
string toString() { result = this.getMadRepresentation() }
|
||||
}
|
||||
|
||||
/** Provides predicates for constructing summary components. */
|
||||
@@ -110,7 +111,6 @@ module Public {
|
||||
}
|
||||
|
||||
/** Gets the stack obtained by dropping the first `i` elements, if any. */
|
||||
pragma[assume_small_delta]
|
||||
SummaryComponentStack drop(int i) {
|
||||
i = 0 and result = this
|
||||
or
|
||||
@@ -125,19 +125,22 @@ module Public {
|
||||
this = TSingletonSummaryComponentStack(result) or result = this.tail().bottom()
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this stack. */
|
||||
string toString() {
|
||||
/** Gets a textual representation of this stack used for MaD models. */
|
||||
string getMadRepresentation() {
|
||||
exists(SummaryComponent head, SummaryComponentStack tail |
|
||||
head = this.head() and
|
||||
tail = this.tail() and
|
||||
result = tail + "." + head
|
||||
result = tail.getMadRepresentation() + "." + head.getMadRepresentation()
|
||||
)
|
||||
or
|
||||
exists(SummaryComponent c |
|
||||
this = TSingletonSummaryComponentStack(c) and
|
||||
result = c.toString()
|
||||
result = c.getMadRepresentation()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this stack. */
|
||||
string toString() { result = this.getMadRepresentation() }
|
||||
}
|
||||
|
||||
/** Provides predicates for constructing stacks of summary components. */
|
||||
@@ -166,42 +169,6 @@ module Public {
|
||||
SummaryComponentStack return(ReturnKind rk) { result = singleton(SummaryComponent::return(rk)) }
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this component used for flow summaries. */
|
||||
private string getComponent(SummaryComponent sc) {
|
||||
result = getComponentSpecific(sc)
|
||||
or
|
||||
exists(ArgumentPosition pos |
|
||||
sc = TParameterSummaryComponent(pos) and
|
||||
result = "Parameter[" + getArgumentPosition(pos) + "]"
|
||||
)
|
||||
or
|
||||
exists(ParameterPosition pos |
|
||||
sc = TArgumentSummaryComponent(pos) and
|
||||
result = "Argument[" + getParameterPosition(pos) + "]"
|
||||
)
|
||||
or
|
||||
exists(string synthetic |
|
||||
sc = TSyntheticGlobalSummaryComponent(synthetic) and
|
||||
result = "SyntheticGlobal[" + synthetic + "]"
|
||||
)
|
||||
or
|
||||
sc = TReturnSummaryComponent(getReturnValueKind()) and result = "ReturnValue"
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this stack used for flow summaries. */
|
||||
string getComponentStack(SummaryComponentStack stack) {
|
||||
exists(SummaryComponent head, SummaryComponentStack tail |
|
||||
head = stack.head() and
|
||||
tail = stack.tail() and
|
||||
result = getComponentStack(tail) + "." + getComponent(head)
|
||||
)
|
||||
or
|
||||
exists(SummaryComponent c |
|
||||
stack = TSingletonSummaryComponentStack(c) and
|
||||
result = getComponent(c)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A class that exists for QL technical reasons only (the IPA type used
|
||||
* to represent component stacks needs to be bounded).
|
||||
@@ -329,11 +296,21 @@ module Public {
|
||||
predicate hasProvenance(Provenance provenance) { provenance = "manual" }
|
||||
}
|
||||
|
||||
/** A callable where there is no flow via the callable. */
|
||||
class NeutralCallable extends SummarizedCallableBase {
|
||||
/**
|
||||
* A callable where there is no flow via the callable.
|
||||
*/
|
||||
class NeutralSummaryCallable extends NeutralCallable {
|
||||
NeutralSummaryCallable() { this.getKind() = "summary" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A callable that has a neutral model.
|
||||
*/
|
||||
class NeutralCallable extends NeutralCallableBase {
|
||||
private string kind;
|
||||
private Provenance provenance;
|
||||
|
||||
NeutralCallable() { neutralSummaryElement(this, provenance) }
|
||||
NeutralCallable() { neutralElement(this, kind, provenance) }
|
||||
|
||||
/**
|
||||
* Holds if the neutral is auto generated.
|
||||
@@ -349,6 +326,11 @@ module Public {
|
||||
* Holds if the neutral has provenance `p`.
|
||||
*/
|
||||
predicate hasProvenance(Provenance p) { p = provenance }
|
||||
|
||||
/**
|
||||
* Gets the kind of the neutral.
|
||||
*/
|
||||
string getKind() { result = kind }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1351,6 +1333,11 @@ module Private {
|
||||
/** Gets the string representation of this callable used by `neutral/1`. */
|
||||
abstract string getCallableCsv();
|
||||
|
||||
/**
|
||||
* Gets the kind of the neutral.
|
||||
*/
|
||||
string getKind() { result = super.getKind() }
|
||||
|
||||
string toString() { result = super.toString() }
|
||||
}
|
||||
|
||||
@@ -1382,8 +1369,8 @@ module Private {
|
||||
c.relevantSummary(input, output, preservesValue) and
|
||||
csv =
|
||||
c.getCallableCsv() // Callable information
|
||||
+ getComponentStack(input) + ";" // input
|
||||
+ getComponentStack(output) + ";" // output
|
||||
+ input.getMadRepresentation() + ";" // input
|
||||
+ output.getMadRepresentation() + ";" // output
|
||||
+ renderKind(preservesValue) + ";" // kind
|
||||
+ renderProvenance(c) // provenance
|
||||
)
|
||||
@@ -1391,12 +1378,13 @@ module Private {
|
||||
|
||||
/**
|
||||
* Holds if a neutral model `csv` exists (semi-colon separated format). Used for testing purposes.
|
||||
* The syntax is: "namespace;type;name;signature;provenance"",
|
||||
* The syntax is: "namespace;type;name;signature;kind;provenance"",
|
||||
*/
|
||||
query predicate neutral(string csv) {
|
||||
exists(RelevantNeutralCallable c |
|
||||
csv =
|
||||
c.getCallableCsv() // Callable information
|
||||
+ c.getKind() + ";" // kind
|
||||
+ renderProvenanceNeutral(c) // provenance
|
||||
)
|
||||
}
|
||||
|
||||
@@ -39,8 +39,16 @@ private import FlowSummaryImpl::Private
|
||||
private import FlowSummaryImpl::Public
|
||||
private import semmle.python.dataflow.new.FlowSummary as FlowSummary
|
||||
|
||||
/**
|
||||
* A class of callables that are candidates for flow summary modeling.
|
||||
*/
|
||||
class SummarizedCallableBase = string;
|
||||
|
||||
/**
|
||||
* A class of callables that are candidates for neutral modeling.
|
||||
*/
|
||||
class NeutralCallableBase = string;
|
||||
|
||||
/** View a `SummarizedCallable` as a `DataFlowCallable`. */
|
||||
DataFlowCallable inject(SummarizedCallable c) { result.asLibraryCallable() = c }
|
||||
|
||||
@@ -91,11 +99,11 @@ predicate summaryElement(
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a neutral summary model exists for `c` with provenance `provenance`,
|
||||
* which means that there is no flow through `c`.
|
||||
* Holds if a neutral model exists for `c` of kind `kind`
|
||||
* and with provenance `provenance`.
|
||||
* Note. Neutral models have not been implemented for Python.
|
||||
*/
|
||||
predicate neutralSummaryElement(FlowSummary::SummarizedCallable c, string provenance) { none() }
|
||||
predicate neutralElement(NeutralCallableBase c, string kind, string provenance) { none() }
|
||||
|
||||
/**
|
||||
* Gets the summary component for specification component `c`, if any.
|
||||
@@ -128,10 +136,30 @@ SummaryComponent interpretComponentSpecific(AccessPathToken c) {
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the textual representation of a summary component in the format used for flow summaries. */
|
||||
string getComponentSpecific(SummaryComponent sc) {
|
||||
sc = TContentSummaryComponent(any(ListElementContent c)) and
|
||||
result = "ListElement"
|
||||
private string getContentSpecific(Content cs) {
|
||||
cs = TListElementContent() and result = "ListElement"
|
||||
or
|
||||
cs = TSetElementContent() and result = "SetElement"
|
||||
or
|
||||
exists(int index |
|
||||
cs = TTupleElementContent(index) and result = "TupleElement[" + index.toString() + "]"
|
||||
)
|
||||
or
|
||||
exists(string key |
|
||||
cs = TDictionaryElementContent(key) and result = "DictionaryElement[" + key + "]"
|
||||
)
|
||||
or
|
||||
cs = TDictionaryElementAnyContent() and result = "DictionaryElementAny"
|
||||
or
|
||||
exists(string attr | cs = TAttributeContent(attr) and result = "Attribute[" + attr + "]")
|
||||
}
|
||||
|
||||
/** Gets the textual representation of a summary component in the format used for MaD models. */
|
||||
string getMadRepresentationSpecific(SummaryComponent sc) {
|
||||
exists(Content c |
|
||||
sc = TContentSummaryComponent(c) and
|
||||
result = getContentSpecific(c)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the textual representation of a parameter position in the format used for flow summaries. */
|
||||
|
||||
@@ -111,13 +111,13 @@ module ImportResolution {
|
||||
allowedEssaImportStep*(firstDef, lastUseVar) and
|
||||
not allowedEssaImportStep(_, firstDef)
|
||||
|
|
||||
not EssaFlow::defToFirstUse(firstDef, _) and
|
||||
not LocalFlow::defToFirstUse(firstDef, _) and
|
||||
val.asVar() = firstDef
|
||||
or
|
||||
exists(ControlFlowNode mid, ControlFlowNode end |
|
||||
EssaFlow::defToFirstUse(firstDef, mid) and
|
||||
EssaFlow::useToNextUse*(mid, end) and
|
||||
not EssaFlow::useToNextUse(end, _) and
|
||||
LocalFlow::defToFirstUse(firstDef, mid) and
|
||||
LocalFlow::useToNextUse*(mid, end) and
|
||||
not LocalFlow::useToNextUse(end, _) and
|
||||
val.asCfgNode() = end
|
||||
)
|
||||
)
|
||||
@@ -227,7 +227,7 @@ module ImportResolution {
|
||||
*/
|
||||
pragma[inline]
|
||||
private Module getModuleFromName(string name) {
|
||||
isPreferredModuleForName(result.getFile(), name + ["", ".__init__"])
|
||||
isPreferredModuleForName(result.getFile(), [name, name + ".__init__"])
|
||||
}
|
||||
|
||||
/** Gets the module from which attributes are imported by `i`. */
|
||||
|
||||
@@ -11,6 +11,7 @@ import DataFlowPublic
|
||||
private import DataFlowPrivate
|
||||
private import semmle.python.internal.CachedStages
|
||||
private import semmle.python.internal.Awaited
|
||||
private import semmle.python.dataflow.new.internal.ImportStar
|
||||
|
||||
/**
|
||||
* A data flow node that is a source of local flow. This includes things like
|
||||
@@ -39,6 +40,22 @@ class LocalSourceNode extends Node {
|
||||
this instanceof ExprNode and
|
||||
not simpleLocalFlowStepForTypetracking(_, this)
|
||||
or
|
||||
// For `from foo import *; foo_function()`, we want to let the variables we think
|
||||
// could originate in `foo` (such as `foo_function`) to be available in the API
|
||||
// graph. This requires them to be local sources. They would not be from the code
|
||||
// just above, since the CFG node has flow going into it from its corresponding
|
||||
// `GlobalSsaVariable`. (a different work-around is to change API graphs to not rely
|
||||
// as heavily on LocalSourceNode; I initially tried this, but it relied on a lot of
|
||||
// copy-pasted code, and it requires some non-trivial deprecation for downgrading
|
||||
// the result type of `.asSource()` to DataFlow::Node, so we've opted for this
|
||||
// approach instead).
|
||||
//
|
||||
// Note: This is only needed at the module level -- uses inside functions appear as
|
||||
// LocalSourceNodes as we expect.
|
||||
//
|
||||
// TODO: When rewriting SSA, we should be able to remove this workaround
|
||||
ImportStar::namePossiblyDefinedInImportStar(this.(ExprNode).getNode(), _, any(Module m))
|
||||
or
|
||||
// We include all module variable nodes, as these act as stepping stones between writes and
|
||||
// reads of global variables. Without them, type tracking based on `LocalSourceNode`s would be
|
||||
// unable to track across global variables.
|
||||
|
||||
@@ -0,0 +1,412 @@
|
||||
/**
|
||||
* Provides the implementation of type tracking steps through flow summaries.
|
||||
* To use this, you must implement the `Input` signature. You can then use the predicates in the `Output`
|
||||
* signature to implement the predicates of the same names inside `TypeTrackerSpecific.qll`.
|
||||
*/
|
||||
|
||||
/** The classes and predicates needed to generate type-tracking steps from summaries. */
|
||||
signature module Input {
|
||||
// Dataflow nodes
|
||||
class Node;
|
||||
|
||||
// Content
|
||||
class TypeTrackerContent;
|
||||
|
||||
class TypeTrackerContentFilter;
|
||||
|
||||
// Relating content and filters
|
||||
/**
|
||||
* Gets a content filter to use for a `WithoutContent[content]` step, (data is not allowed to be stored in `content`)
|
||||
* or has no result if
|
||||
* the step should be treated as ordinary flow.
|
||||
*
|
||||
* `WithoutContent` is often used to perform strong updates on individual collection elements, but for
|
||||
* type-tracking this is rarely beneficial and quite expensive. However, `WithoutContent` can be quite useful
|
||||
* for restricting the type of an object, and in these cases we translate it to a filter.
|
||||
*/
|
||||
TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content);
|
||||
|
||||
/**
|
||||
* Gets a content filter to use for a `WithContent[content]` step, (data must be stored in `content`)
|
||||
* or has no result if
|
||||
* the step cannot be handled by type-tracking.
|
||||
*
|
||||
* `WithContent` is often used to perform strong updates on individual collection elements (or rather
|
||||
* to preserve those that didn't get updated). But for type-tracking this is rarely beneficial and quite expensive.
|
||||
* However, `WithContent` can be quite useful for restricting the type of an object, and in these cases we translate it to a filter.
|
||||
*/
|
||||
TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content);
|
||||
|
||||
// Summaries and their stacks
|
||||
class SummaryComponent;
|
||||
|
||||
class SummaryComponentStack {
|
||||
SummaryComponent head();
|
||||
}
|
||||
|
||||
/** Gets a singleton stack containing `component`. */
|
||||
SummaryComponentStack singleton(SummaryComponent component);
|
||||
|
||||
/**
|
||||
* Gets the stack obtained by pushing `head` onto `tail`.
|
||||
*/
|
||||
SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail);
|
||||
|
||||
/** Gets a singleton stack representing a return. */
|
||||
SummaryComponent return();
|
||||
|
||||
// Relating content to summaries
|
||||
/** Gets a summary component for content `c`. */
|
||||
SummaryComponent content(TypeTrackerContent contents);
|
||||
|
||||
/** Gets a summary component where data is not allowed to be stored in `contents`. */
|
||||
SummaryComponent withoutContent(TypeTrackerContent contents);
|
||||
|
||||
/** Gets a summary component where data must be stored in `contents`. */
|
||||
SummaryComponent withContent(TypeTrackerContent contents);
|
||||
|
||||
// Callables
|
||||
class SummarizedCallable {
|
||||
predicate propagatesFlow(
|
||||
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
|
||||
);
|
||||
}
|
||||
|
||||
// Relating nodes to summaries
|
||||
/**
|
||||
* Gets a dataflow node respresenting the argument of `call` indicated by `arg`.
|
||||
*
|
||||
* Returns the post-update node of the argument when `isPostUpdate` is true.
|
||||
*/
|
||||
Node argumentOf(Node call, SummaryComponent arg, boolean isPostUpdate);
|
||||
|
||||
/** Gets a dataflow node respresenting the parameter of `callable` indicated by `param`. */
|
||||
Node parameterOf(Node callable, SummaryComponent param);
|
||||
|
||||
/** Gets a dataflow node respresenting the return of `callable` indicated by `return`. */
|
||||
Node returnOf(Node callable, SummaryComponent return);
|
||||
|
||||
// Relating callables to nodes
|
||||
/** Gets a dataflow node respresenting a call to `callable`. */
|
||||
Node callTo(SummarizedCallable callable);
|
||||
}
|
||||
|
||||
/**
|
||||
* The predicates provided by a summary type tracker.
|
||||
* These are meant to be used in `TypeTrackerSpecific.qll`
|
||||
* inside the predicates of the same names.
|
||||
*/
|
||||
signature module Output<Input I> {
|
||||
/**
|
||||
* Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph.
|
||||
*/
|
||||
predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo);
|
||||
|
||||
/**
|
||||
* Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`.
|
||||
*/
|
||||
predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content);
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
|
||||
*/
|
||||
predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content);
|
||||
|
||||
/**
|
||||
* Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`.
|
||||
*/
|
||||
predicate basicLoadStoreStep(
|
||||
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent,
|
||||
I::TypeTrackerContent storeContent
|
||||
);
|
||||
|
||||
/**
|
||||
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here.
|
||||
*/
|
||||
predicate basicWithoutContentStep(
|
||||
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
|
||||
);
|
||||
|
||||
/**
|
||||
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`.
|
||||
*/
|
||||
predicate basicWithContentStep(
|
||||
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of the summary type tracker, that is type tracking through flow summaries.
|
||||
*/
|
||||
module SummaryFlow<Input I> implements Output<I> {
|
||||
pragma[nomagic]
|
||||
private predicate isNonLocal(I::SummaryComponent component) {
|
||||
component = I::content(_)
|
||||
or
|
||||
component = I::withContent(_)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate hasLoadSummary(
|
||||
I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input,
|
||||
I::SummaryComponentStack output
|
||||
) {
|
||||
callable.propagatesFlow(I::push(I::content(contents), input), output, true) and
|
||||
not isNonLocal(input.head()) and
|
||||
not isNonLocal(output.head())
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate hasStoreSummary(
|
||||
I::SummarizedCallable callable, I::TypeTrackerContent contents, I::SummaryComponentStack input,
|
||||
I::SummaryComponentStack output
|
||||
) {
|
||||
not isNonLocal(input.head()) and
|
||||
not isNonLocal(output.head()) and
|
||||
(
|
||||
callable.propagatesFlow(input, I::push(I::content(contents), output), true)
|
||||
or
|
||||
// Allow the input to start with an arbitrary WithoutContent[X].
|
||||
// Since type-tracking only tracks one content deep, and we're about to store into another content,
|
||||
// we're already preventing the input from being in a content.
|
||||
callable
|
||||
.propagatesFlow(I::push(I::withoutContent(_), input),
|
||||
I::push(I::content(contents), output), true)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate hasLoadStoreSummary(
|
||||
I::SummarizedCallable callable, I::TypeTrackerContent loadContents,
|
||||
I::TypeTrackerContent storeContents, I::SummaryComponentStack input,
|
||||
I::SummaryComponentStack output
|
||||
) {
|
||||
callable
|
||||
.propagatesFlow(I::push(I::content(loadContents), input),
|
||||
I::push(I::content(storeContents), output), true) and
|
||||
not isNonLocal(input.head()) and
|
||||
not isNonLocal(output.head())
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate hasWithoutContentSummary(
|
||||
I::SummarizedCallable callable, I::TypeTrackerContentFilter filter,
|
||||
I::SummaryComponentStack input, I::SummaryComponentStack output
|
||||
) {
|
||||
exists(I::TypeTrackerContent content |
|
||||
callable.propagatesFlow(I::push(I::withoutContent(content), input), output, true) and
|
||||
filter = I::getFilterFromWithoutContentStep(content) and
|
||||
not isNonLocal(input.head()) and
|
||||
not isNonLocal(output.head()) and
|
||||
input != output
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate hasWithContentSummary(
|
||||
I::SummarizedCallable callable, I::TypeTrackerContentFilter filter,
|
||||
I::SummaryComponentStack input, I::SummaryComponentStack output
|
||||
) {
|
||||
exists(I::TypeTrackerContent content |
|
||||
callable.propagatesFlow(I::push(I::withContent(content), input), output, true) and
|
||||
filter = I::getFilterFromWithContentStep(content) and
|
||||
not isNonLocal(input.head()) and
|
||||
not isNonLocal(output.head()) and
|
||||
input != output
|
||||
)
|
||||
}
|
||||
|
||||
private predicate componentLevelStep(I::SummaryComponent component) {
|
||||
exists(I::TypeTrackerContent content |
|
||||
component = I::withoutContent(content) and
|
||||
not exists(I::getFilterFromWithoutContentStep(content))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a data flow `I::Node` corresponding an argument or return value of `call`,
|
||||
* as specified by `component`. `isOutput` indicates whether the node represents
|
||||
* an output node or an input node.
|
||||
*/
|
||||
bindingset[call, component]
|
||||
private I::Node evaluateSummaryComponentLocal(
|
||||
I::Node call, I::SummaryComponent component, boolean isOutput
|
||||
) {
|
||||
result = I::argumentOf(call, component, isOutput)
|
||||
or
|
||||
component = I::return() and
|
||||
result = call and
|
||||
isOutput = true
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `callable` is relevant for type-tracking and we therefore want `stack` to
|
||||
* be evaluated locally at its call sites.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private predicate dependsOnSummaryComponentStack(
|
||||
I::SummarizedCallable callable, I::SummaryComponentStack stack
|
||||
) {
|
||||
exists(I::callTo(callable)) and
|
||||
(
|
||||
callable.propagatesFlow(stack, _, true)
|
||||
or
|
||||
callable.propagatesFlow(_, stack, true)
|
||||
or
|
||||
// include store summaries as they may skip an initial step at the input
|
||||
hasStoreSummary(callable, _, stack, _)
|
||||
)
|
||||
or
|
||||
dependsOnSummaryComponentStackCons(callable, _, stack)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate dependsOnSummaryComponentStackCons(
|
||||
I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail
|
||||
) {
|
||||
dependsOnSummaryComponentStack(callable, I::push(head, tail))
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate dependsOnSummaryComponentStackConsLocal(
|
||||
I::SummarizedCallable callable, I::SummaryComponent head, I::SummaryComponentStack tail
|
||||
) {
|
||||
dependsOnSummaryComponentStackCons(callable, head, tail) and
|
||||
not isNonLocal(head)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate dependsOnSummaryComponentStackLeaf(
|
||||
I::SummarizedCallable callable, I::SummaryComponent leaf
|
||||
) {
|
||||
dependsOnSummaryComponentStack(callable, I::singleton(leaf))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a data flow I::Node corresponding to the local input or output of `call`
|
||||
* identified by `stack`, if possible.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private I::Node evaluateSummaryComponentStackLocal(
|
||||
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack stack, boolean isOutput
|
||||
) {
|
||||
exists(I::SummaryComponent component |
|
||||
dependsOnSummaryComponentStackLeaf(callable, component) and
|
||||
stack = I::singleton(component) and
|
||||
call = I::callTo(callable) and
|
||||
result = evaluateSummaryComponentLocal(call, component, isOutput)
|
||||
)
|
||||
or
|
||||
exists(
|
||||
I::Node prev, I::SummaryComponent head, I::SummaryComponentStack tail, boolean isOutput0
|
||||
|
|
||||
prev = evaluateSummaryComponentStackLocal(callable, call, tail, isOutput0) and
|
||||
dependsOnSummaryComponentStackConsLocal(callable, pragma[only_bind_into](head),
|
||||
pragma[only_bind_out](tail)) and
|
||||
stack = I::push(pragma[only_bind_out](head), pragma[only_bind_out](tail))
|
||||
|
|
||||
// `Parameter[X]` is only allowed in the output of flow summaries (hence `isOutput = true`),
|
||||
// however the target of the parameter (e.g. `Argument[Y].Parameter[X]`) should be fetched
|
||||
// not from a post-update argument node (hence `isOutput0 = false`)
|
||||
result = I::parameterOf(prev, head) and
|
||||
isOutput0 = false and
|
||||
isOutput = true
|
||||
or
|
||||
// `ReturnValue` is only allowed in the input of flow summaries (hence `isOutput = false`),
|
||||
// and the target of the return value (e.g. `Argument[X].ReturnValue`) should be fetched not
|
||||
// from a post-update argument node (hence `isOutput0 = false`)
|
||||
result = I::returnOf(prev, head) and
|
||||
isOutput0 = false and
|
||||
isOutput = false
|
||||
or
|
||||
componentLevelStep(head) and
|
||||
result = prev and
|
||||
isOutput = isOutput0
|
||||
)
|
||||
}
|
||||
|
||||
// Implement Output
|
||||
predicate levelStepNoCall(I::Node nodeFrom, I::Node nodeTo) {
|
||||
exists(
|
||||
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
|
||||
I::SummaryComponentStack output
|
||||
|
|
||||
callable.propagatesFlow(input, output, true) and
|
||||
call = I::callTo(callable) and
|
||||
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input, false) and
|
||||
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output, true)
|
||||
)
|
||||
}
|
||||
|
||||
predicate basicLoadStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) {
|
||||
exists(
|
||||
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
|
||||
I::SummaryComponentStack output
|
||||
|
|
||||
hasLoadSummary(callable, content, pragma[only_bind_into](input),
|
||||
pragma[only_bind_into](output)) and
|
||||
call = I::callTo(callable) and
|
||||
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input, false) and
|
||||
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output, true)
|
||||
)
|
||||
}
|
||||
|
||||
predicate basicStoreStep(I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent content) {
|
||||
exists(
|
||||
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
|
||||
I::SummaryComponentStack output
|
||||
|
|
||||
hasStoreSummary(callable, content, pragma[only_bind_into](input),
|
||||
pragma[only_bind_into](output)) and
|
||||
call = I::callTo(callable) and
|
||||
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input, false) and
|
||||
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output, true)
|
||||
)
|
||||
}
|
||||
|
||||
predicate basicLoadStoreStep(
|
||||
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContent loadContent,
|
||||
I::TypeTrackerContent storeContent
|
||||
) {
|
||||
exists(
|
||||
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
|
||||
I::SummaryComponentStack output
|
||||
|
|
||||
hasLoadStoreSummary(callable, loadContent, storeContent, pragma[only_bind_into](input),
|
||||
pragma[only_bind_into](output)) and
|
||||
call = I::callTo(callable) and
|
||||
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input, false) and
|
||||
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output, true)
|
||||
)
|
||||
}
|
||||
|
||||
predicate basicWithoutContentStep(
|
||||
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
|
||||
) {
|
||||
exists(
|
||||
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
|
||||
I::SummaryComponentStack output
|
||||
|
|
||||
hasWithoutContentSummary(callable, filter, pragma[only_bind_into](input),
|
||||
pragma[only_bind_into](output)) and
|
||||
call = I::callTo(callable) and
|
||||
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input, false) and
|
||||
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output, true)
|
||||
)
|
||||
}
|
||||
|
||||
predicate basicWithContentStep(
|
||||
I::Node nodeFrom, I::Node nodeTo, I::TypeTrackerContentFilter filter
|
||||
) {
|
||||
exists(
|
||||
I::SummarizedCallable callable, I::Node call, I::SummaryComponentStack input,
|
||||
I::SummaryComponentStack output
|
||||
|
|
||||
hasWithContentSummary(callable, filter, pragma[only_bind_into](input),
|
||||
pragma[only_bind_into](output)) and
|
||||
call = I::callTo(callable) and
|
||||
nodeFrom = evaluateSummaryComponentStackLocal(callable, call, input, false) and
|
||||
nodeTo = evaluateSummaryComponentStackLocal(callable, call, output, true)
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
/**
|
||||
* Provides Python-specific definitions for use in the taint tracking library.
|
||||
*/
|
||||
|
||||
private import codeql.dataflow.TaintTracking
|
||||
private import DataFlowImplSpecific
|
||||
|
||||
module PythonTaintTracking implements InputSig<PythonDataFlow> {
|
||||
import TaintTrackingPrivate
|
||||
}
|
||||
@@ -16,7 +16,7 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
|
||||
* of `c` at sinks and inputs to additional taint steps.
|
||||
*/
|
||||
bindingset[node]
|
||||
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::Content c) { none() }
|
||||
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) { none() }
|
||||
|
||||
private module Cached {
|
||||
/**
|
||||
|
||||
@@ -55,10 +55,9 @@ private module Cached {
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private TypeTracker noContentTypeTracker(boolean hasCall) {
|
||||
result = MkTypeTracker(hasCall, noContent())
|
||||
}
|
||||
/** Gets a type tracker with no content and the call bit set to the given value. */
|
||||
cached
|
||||
TypeTracker noContentTypeTracker(boolean hasCall) { result = MkTypeTracker(hasCall, noContent()) }
|
||||
|
||||
/** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */
|
||||
cached
|
||||
@@ -235,17 +234,6 @@ private predicate stepProj(TypeTrackingNode nodeFrom, StepSummary summary) {
|
||||
step(nodeFrom, _, summary)
|
||||
}
|
||||
|
||||
bindingset[nodeFrom, t]
|
||||
pragma[inline_late]
|
||||
pragma[noopt]
|
||||
private TypeTracker stepInlineLate(TypeTracker t, TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
stepProj(nodeFrom, summary) and
|
||||
result = t.append(summary) and
|
||||
step(nodeFrom, nodeTo, summary)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate smallstep(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
|
||||
smallstepNoCall(nodeFrom, nodeTo, summary)
|
||||
or
|
||||
@@ -257,17 +245,6 @@ private predicate smallstepProj(Node nodeFrom, StepSummary summary) {
|
||||
smallstep(nodeFrom, _, summary)
|
||||
}
|
||||
|
||||
bindingset[nodeFrom, t]
|
||||
pragma[inline_late]
|
||||
pragma[noopt]
|
||||
private TypeTracker smallstepInlineLate(TypeTracker t, Node nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
smallstepProj(nodeFrom, summary) and
|
||||
result = t.append(summary) and
|
||||
smallstep(nodeFrom, nodeTo, summary)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` is being written to the `content` of the object in `nodeTo`.
|
||||
*
|
||||
@@ -340,6 +317,8 @@ class StepSummary extends TStepSummary {
|
||||
|
||||
/** Provides predicates for updating step summaries (`StepSummary`s). */
|
||||
module StepSummary {
|
||||
predicate append = Cached::append/2;
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
@@ -400,6 +379,35 @@ module StepSummary {
|
||||
}
|
||||
|
||||
deprecated predicate localSourceStoreStep = flowsToStoreStep/3;
|
||||
|
||||
/** Gets the step summary for a level step. */
|
||||
StepSummary levelStep() { result = LevelStep() }
|
||||
|
||||
/** Gets the step summary for a call step. */
|
||||
StepSummary callStep() { result = CallStep() }
|
||||
|
||||
/** Gets the step summary for a return step. */
|
||||
StepSummary returnStep() { result = ReturnStep() }
|
||||
|
||||
/** Gets the step summary for storing into `content`. */
|
||||
StepSummary storeStep(TypeTrackerContent content) { result = StoreStep(content) }
|
||||
|
||||
/** Gets the step summary for loading from `content`. */
|
||||
StepSummary loadStep(TypeTrackerContent content) { result = LoadStep(content) }
|
||||
|
||||
/** Gets the step summary for loading from `load` and then storing into `store`. */
|
||||
StepSummary loadStoreStep(TypeTrackerContent load, TypeTrackerContent store) {
|
||||
result = LoadStoreStep(load, store)
|
||||
}
|
||||
|
||||
/** Gets the step summary for a step that only permits contents matched by `filter`. */
|
||||
StepSummary withContent(ContentFilter filter) { result = WithContent(filter) }
|
||||
|
||||
/** Gets the step summary for a step that blocks contents matched by `filter`. */
|
||||
StepSummary withoutContent(ContentFilter filter) { result = WithoutContent(filter) }
|
||||
|
||||
/** Gets the step summary for a jump step. */
|
||||
StepSummary jumpStep() { result = JumpStep() }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -501,9 +509,26 @@ class TypeTracker extends TTypeTracker {
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*/
|
||||
pragma[inline]
|
||||
bindingset[nodeFrom, this]
|
||||
pragma[inline_late]
|
||||
pragma[noopt]
|
||||
TypeTracker step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo) {
|
||||
result = stepInlineLate(this, nodeFrom, nodeTo)
|
||||
exists(StepSummary summary |
|
||||
stepProj(nodeFrom, summary) and
|
||||
result = this.append(summary) and
|
||||
step(nodeFrom, nodeTo, summary)
|
||||
)
|
||||
}
|
||||
|
||||
bindingset[nodeFrom, this]
|
||||
pragma[inline_late]
|
||||
pragma[noopt]
|
||||
private TypeTracker smallstepNoSimpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
smallstepProj(nodeFrom, summary) and
|
||||
result = this.append(summary) and
|
||||
smallstep(nodeFrom, nodeTo, summary)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -532,7 +557,7 @@ class TypeTracker extends TTypeTracker {
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
|
||||
result = smallstepInlineLate(this, nodeFrom, nodeTo)
|
||||
result = this.smallstepNoSimpleLocalFlowStep(nodeFrom, nodeTo)
|
||||
or
|
||||
simpleLocalFlowStep(nodeFrom, nodeTo) and
|
||||
result = this
|
||||
@@ -545,6 +570,13 @@ module TypeTracker {
|
||||
* Gets a valid end point of type tracking.
|
||||
*/
|
||||
TypeTracker end() { result.end() }
|
||||
|
||||
/**
|
||||
* INTERNAL USE ONLY.
|
||||
*
|
||||
* Gets a valid end point of type tracking with the call bit set to the given value.
|
||||
*/
|
||||
predicate end = Cached::noContentTypeTracker/1;
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
@@ -552,34 +584,10 @@ private predicate backStepProj(TypeTrackingNode nodeTo, StepSummary summary) {
|
||||
step(_, nodeTo, summary)
|
||||
}
|
||||
|
||||
bindingset[nodeTo, t]
|
||||
pragma[inline_late]
|
||||
pragma[noopt]
|
||||
private TypeBackTracker backStepInlineLate(
|
||||
TypeBackTracker t, TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo
|
||||
) {
|
||||
exists(StepSummary summary |
|
||||
backStepProj(nodeTo, summary) and
|
||||
result = t.prepend(summary) and
|
||||
step(nodeFrom, nodeTo, summary)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate backSmallstepProj(TypeTrackingNode nodeTo, StepSummary summary) {
|
||||
smallstep(_, nodeTo, summary)
|
||||
}
|
||||
|
||||
bindingset[nodeTo, t]
|
||||
pragma[inline_late]
|
||||
pragma[noopt]
|
||||
private TypeBackTracker backSmallstepInlineLate(TypeBackTracker t, Node nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
backSmallstepProj(nodeTo, summary) and
|
||||
result = t.prepend(summary) and
|
||||
smallstep(nodeFrom, nodeTo, summary)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A summary of the steps needed to back-track a use of a value to a given dataflow node.
|
||||
*
|
||||
@@ -661,9 +669,26 @@ class TypeBackTracker extends TTypeBackTracker {
|
||||
* Gets the summary that corresponds to having taken a backwards
|
||||
* heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
|
||||
*/
|
||||
pragma[inline]
|
||||
bindingset[nodeTo, result]
|
||||
pragma[inline_late]
|
||||
pragma[noopt]
|
||||
TypeBackTracker step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo) {
|
||||
this = backStepInlineLate(result, nodeFrom, nodeTo)
|
||||
exists(StepSummary summary |
|
||||
backStepProj(nodeTo, summary) and
|
||||
this = result.prepend(summary) and
|
||||
step(nodeFrom, nodeTo, summary)
|
||||
)
|
||||
}
|
||||
|
||||
bindingset[nodeTo, result]
|
||||
pragma[inline_late]
|
||||
pragma[noopt]
|
||||
private TypeBackTracker smallstepNoSimpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
backSmallstepProj(nodeTo, summary) and
|
||||
this = result.prepend(summary) and
|
||||
smallstep(nodeFrom, nodeTo, summary)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -692,7 +717,7 @@ class TypeBackTracker extends TTypeBackTracker {
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) {
|
||||
this = backSmallstepInlineLate(result, nodeFrom, nodeTo)
|
||||
this = this.smallstepNoSimpleLocalFlowStep(nodeFrom, nodeTo)
|
||||
or
|
||||
simpleLocalFlowStep(nodeFrom, nodeTo) and
|
||||
this = result
|
||||
|
||||
@@ -61,7 +61,9 @@ predicate capturedJumpStep(Node nodeFrom, Node nodeTo) {
|
||||
predicate levelStepCall(Node nodeFrom, Node nodeTo) { none() }
|
||||
|
||||
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */
|
||||
predicate levelStepNoCall(Node nodeFrom, Node nodeTo) { none() }
|
||||
predicate levelStepNoCall(Node nodeFrom, Node nodeTo) {
|
||||
TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the name of a possible piece of content. For Python, this is currently only attribute names,
|
||||
@@ -108,6 +110,12 @@ predicate basicStoreStep(Node nodeFrom, Node nodeTo, string content) {
|
||||
nodeFrom = a.getValue() and
|
||||
nodeTo = a.getObject()
|
||||
)
|
||||
or
|
||||
exists(DataFlowPublic::ContentSet contents |
|
||||
contents.(DataFlowPublic::AttributeContent).getAttribute() = content
|
||||
|
|
||||
TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, contents)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -119,13 +127,24 @@ predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) {
|
||||
nodeFrom = a.getObject() and
|
||||
nodeTo = a
|
||||
)
|
||||
or
|
||||
exists(DataFlowPublic::ContentSet contents |
|
||||
contents.(DataFlowPublic::AttributeContent).getAttribute() = content
|
||||
|
|
||||
TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, contents)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`.
|
||||
*/
|
||||
predicate basicLoadStoreStep(Node nodeFrom, Node nodeTo, string loadContent, string storeContent) {
|
||||
none()
|
||||
exists(DataFlowPublic::ContentSet loadContents, DataFlowPublic::ContentSet storeContents |
|
||||
loadContents.(DataFlowPublic::AttributeContent).getAttribute() = loadContent and
|
||||
storeContents.(DataFlowPublic::AttributeContent).getAttribute() = storeContent
|
||||
|
|
||||
TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContents, storeContents)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -144,3 +163,94 @@ predicate basicWithContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter)
|
||||
class Boolean extends boolean {
|
||||
Boolean() { this = true or this = false }
|
||||
}
|
||||
|
||||
private import SummaryTypeTracker as SummaryTypeTracker
|
||||
private import semmle.python.dataflow.new.FlowSummary as FlowSummary
|
||||
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
|
||||
|
||||
pragma[noinline]
|
||||
private predicate argumentPositionMatch(
|
||||
DataFlowPublic::CallCfgNode call, DataFlowPublic::Node arg,
|
||||
DataFlowDispatch::ParameterPosition ppos
|
||||
) {
|
||||
exists(DataFlowDispatch::ArgumentPosition apos |
|
||||
DataFlowDispatch::parameterMatch(ppos, apos) and
|
||||
DataFlowDispatch::normalCallArg(call.getNode(), arg, apos)
|
||||
)
|
||||
}
|
||||
|
||||
private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
|
||||
// Dataflow nodes
|
||||
class Node = DataFlowPublic::Node;
|
||||
|
||||
// Content
|
||||
class TypeTrackerContent = DataFlowPublic::ContentSet;
|
||||
|
||||
class TypeTrackerContentFilter = ContentFilter;
|
||||
|
||||
TypeTrackerContentFilter getFilterFromWithoutContentStep(TypeTrackerContent content) { none() }
|
||||
|
||||
TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content) { none() }
|
||||
|
||||
// Callables
|
||||
class SummarizedCallable = FlowSummary::SummarizedCallable;
|
||||
|
||||
// Summaries and their stacks
|
||||
class SummaryComponent = FlowSummary::SummaryComponent;
|
||||
|
||||
class SummaryComponentStack = FlowSummary::SummaryComponentStack;
|
||||
|
||||
predicate singleton = FlowSummary::SummaryComponentStack::singleton/1;
|
||||
|
||||
predicate push = FlowSummary::SummaryComponentStack::push/2;
|
||||
|
||||
// Relating content to summaries
|
||||
predicate content = FlowSummary::SummaryComponent::content/1;
|
||||
|
||||
SummaryComponent withoutContent(TypeTrackerContent contents) { none() }
|
||||
|
||||
SummaryComponent withContent(TypeTrackerContent contents) { none() }
|
||||
|
||||
predicate return = FlowSummary::SummaryComponent::return/0;
|
||||
|
||||
// Relating nodes to summaries
|
||||
Node argumentOf(Node call, SummaryComponent arg, boolean isPostUpdate) {
|
||||
exists(DataFlowDispatch::ParameterPosition pos |
|
||||
arg = FlowSummary::SummaryComponent::argument(pos) and
|
||||
argumentPositionMatch(call, result, pos) and
|
||||
isPostUpdate = [false, true] // todo: implement when/if Python uses post-update nodes in type tracking
|
||||
)
|
||||
}
|
||||
|
||||
Node parameterOf(Node callable, SummaryComponent param) {
|
||||
exists(
|
||||
DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos, Parameter p
|
||||
|
|
||||
param = FlowSummary::SummaryComponent::parameter(apos) and
|
||||
DataFlowDispatch::parameterMatch(ppos, apos) and
|
||||
// pick the SsaNode rather than the CfgNode
|
||||
result.asVar().getDefinition().(ParameterDefinition).getParameter() = p and
|
||||
(
|
||||
exists(int i | ppos.isPositional(i) |
|
||||
p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArg(i)
|
||||
)
|
||||
or
|
||||
exists(string name | ppos.isKeyword(name) |
|
||||
p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArgByName(name)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
Node returnOf(Node callable, SummaryComponent return) {
|
||||
return = FlowSummary::SummaryComponent::return() and
|
||||
// `result` should be the return value of a callable expression (lambda or function) referenced by `callable`
|
||||
result.asCfgNode() =
|
||||
callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode()
|
||||
}
|
||||
|
||||
// Relating callables to nodes
|
||||
Node callTo(SummarizedCallable callable) { result = callable.getACallSimple() }
|
||||
}
|
||||
|
||||
private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow<SummaryTypeTrackerInput>;
|
||||
|
||||
@@ -1,74 +0,0 @@
|
||||
/**
|
||||
* Provides classes for performing local (intra-procedural) and
|
||||
* global (inter-procedural) taint-tracking analyses.
|
||||
*/
|
||||
|
||||
import TaintTrackingParameter::Public
|
||||
private import TaintTrackingParameter::Private
|
||||
|
||||
private module AddTaintDefaults<DataFlowInternal::FullStateConfigSig Config> implements
|
||||
DataFlowInternal::FullStateConfigSig
|
||||
{
|
||||
import Config
|
||||
|
||||
predicate isBarrier(DataFlow::Node node) {
|
||||
Config::isBarrier(node) or defaultTaintSanitizer(node)
|
||||
}
|
||||
|
||||
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
|
||||
Config::isAdditionalFlowStep(node1, node2) or
|
||||
defaultAdditionalTaintStep(node1, node2)
|
||||
}
|
||||
|
||||
predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c) {
|
||||
Config::allowImplicitRead(node, c)
|
||||
or
|
||||
(
|
||||
Config::isSink(node, _) or
|
||||
Config::isAdditionalFlowStep(node, _) or
|
||||
Config::isAdditionalFlowStep(node, _, _, _)
|
||||
) and
|
||||
defaultImplicitTaintRead(node, c)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a global taint tracking computation.
|
||||
*/
|
||||
module Global<DataFlow::ConfigSig Config> implements DataFlow::GlobalFlowSig {
|
||||
private module Config0 implements DataFlowInternal::FullStateConfigSig {
|
||||
import DataFlowInternal::DefaultState<Config>
|
||||
import Config
|
||||
}
|
||||
|
||||
private module C implements DataFlowInternal::FullStateConfigSig {
|
||||
import AddTaintDefaults<Config0>
|
||||
}
|
||||
|
||||
import DataFlowInternal::Impl<C>
|
||||
}
|
||||
|
||||
/** DEPRECATED: Use `Global` instead. */
|
||||
deprecated module Make<DataFlow::ConfigSig Config> implements DataFlow::GlobalFlowSig {
|
||||
import Global<Config>
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a global taint tracking computation using flow state.
|
||||
*/
|
||||
module GlobalWithState<DataFlow::StateConfigSig Config> implements DataFlow::GlobalFlowSig {
|
||||
private module Config0 implements DataFlowInternal::FullStateConfigSig {
|
||||
import Config
|
||||
}
|
||||
|
||||
private module C implements DataFlowInternal::FullStateConfigSig {
|
||||
import AddTaintDefaults<Config0>
|
||||
}
|
||||
|
||||
import DataFlowInternal::Impl<C>
|
||||
}
|
||||
|
||||
/** DEPRECATED: Use `GlobalWithState` instead. */
|
||||
deprecated module MakeWithState<DataFlow::StateConfigSig Config> implements DataFlow::GlobalFlowSig {
|
||||
import GlobalWithState<Config>
|
||||
}
|
||||
@@ -116,33 +116,6 @@ abstract class Configuration extends DataFlow::Configuration {
|
||||
|
||||
final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isSanitizer` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if taint propagation through nodes guarded by `guard` is prohibited.
|
||||
*/
|
||||
deprecated predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
|
||||
|
||||
deprecated final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
|
||||
this.isSanitizerGuard(guard)
|
||||
}
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isSanitizer` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if taint propagation through nodes guarded by `guard` is prohibited
|
||||
* when the flow state is `state`.
|
||||
*/
|
||||
deprecated predicate isSanitizerGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) {
|
||||
none()
|
||||
}
|
||||
|
||||
deprecated final override predicate isBarrierGuard(
|
||||
DataFlow::BarrierGuard guard, DataFlow::FlowState state
|
||||
) {
|
||||
this.isSanitizerGuard(guard, state)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps.
|
||||
*/
|
||||
|
||||
@@ -116,33 +116,6 @@ abstract class Configuration extends DataFlow::Configuration {
|
||||
|
||||
final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isSanitizer` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if taint propagation through nodes guarded by `guard` is prohibited.
|
||||
*/
|
||||
deprecated predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
|
||||
|
||||
deprecated final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
|
||||
this.isSanitizerGuard(guard)
|
||||
}
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isSanitizer` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if taint propagation through nodes guarded by `guard` is prohibited
|
||||
* when the flow state is `state`.
|
||||
*/
|
||||
deprecated predicate isSanitizerGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) {
|
||||
none()
|
||||
}
|
||||
|
||||
deprecated final override predicate isBarrierGuard(
|
||||
DataFlow::BarrierGuard guard, DataFlow::FlowState state
|
||||
) {
|
||||
this.isSanitizerGuard(guard, state)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps.
|
||||
*/
|
||||
|
||||
@@ -116,33 +116,6 @@ abstract class Configuration extends DataFlow::Configuration {
|
||||
|
||||
final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isSanitizer` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if taint propagation through nodes guarded by `guard` is prohibited.
|
||||
*/
|
||||
deprecated predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
|
||||
|
||||
deprecated final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
|
||||
this.isSanitizerGuard(guard)
|
||||
}
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isSanitizer` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if taint propagation through nodes guarded by `guard` is prohibited
|
||||
* when the flow state is `state`.
|
||||
*/
|
||||
deprecated predicate isSanitizerGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) {
|
||||
none()
|
||||
}
|
||||
|
||||
deprecated final override predicate isBarrierGuard(
|
||||
DataFlow::BarrierGuard guard, DataFlow::FlowState state
|
||||
) {
|
||||
this.isSanitizerGuard(guard, state)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps.
|
||||
*/
|
||||
|
||||
@@ -116,33 +116,6 @@ abstract class Configuration extends DataFlow::Configuration {
|
||||
|
||||
final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) }
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isSanitizer` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if taint propagation through nodes guarded by `guard` is prohibited.
|
||||
*/
|
||||
deprecated predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
|
||||
|
||||
deprecated final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
|
||||
this.isSanitizerGuard(guard)
|
||||
}
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `isSanitizer` and `BarrierGuard` module instead.
|
||||
*
|
||||
* Holds if taint propagation through nodes guarded by `guard` is prohibited
|
||||
* when the flow state is `state`.
|
||||
*/
|
||||
deprecated predicate isSanitizerGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) {
|
||||
none()
|
||||
}
|
||||
|
||||
deprecated final override predicate isBarrierGuard(
|
||||
DataFlow::BarrierGuard guard, DataFlow::FlowState state
|
||||
) {
|
||||
this.isSanitizerGuard(guard, state)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps.
|
||||
*/
|
||||
|
||||
@@ -664,6 +664,14 @@ module DataFlow {
|
||||
}
|
||||
}
|
||||
|
||||
deprecated private class DataFlowType extends TaintKind {
|
||||
// this only exists to avoid an empty recursion error in the type checker
|
||||
DataFlowType() {
|
||||
this = "Data flow" and
|
||||
1 = 2
|
||||
}
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
private predicate dict_construct(ControlFlowNode itemnode, ControlFlowNode dictnode) {
|
||||
dictnode.(DictNode).getAValue() = itemnode
|
||||
|
||||
@@ -20,7 +20,12 @@ module SsaSource {
|
||||
/** Holds if `v` is defined by assignment at `defn` and given `value`. */
|
||||
cached
|
||||
predicate assignment_definition(Variable v, ControlFlowNode defn, ControlFlowNode value) {
|
||||
defn.(NameNode).defines(v) and defn.(DefinitionNode).getValue() = value
|
||||
defn.(NameNode).defines(v) and
|
||||
defn.(DefinitionNode).getValue() = value and
|
||||
// since parameter will be considered a DefinitionNode, if it has a default value,
|
||||
// we need to exclude it here since it is already covered by parameter_definition
|
||||
// (and points-to was unhappy that it was included in both)
|
||||
not parameter_definition(v, defn)
|
||||
}
|
||||
|
||||
/** Holds if `v` is defined by assignment of the captured exception. */
|
||||
|
||||
42
python/ql/lib/semmle/python/frameworks/Aiofile.qll
Normal file
42
python/ql/lib/semmle/python/frameworks/Aiofile.qll
Normal file
@@ -0,0 +1,42 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `aiofile` PyPI package.
|
||||
*
|
||||
* See https://pypi.org/project/aiofile.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for the `aiofile` PyPI package.
|
||||
*
|
||||
* See https://pypi.org/project/aiofile.
|
||||
*/
|
||||
private module Aiofile {
|
||||
/**
|
||||
* A call to the `async_open` function or `AIOFile` constructor from `aiofile` as a sink for Filesystem access.
|
||||
*/
|
||||
class FileResponseCall extends FileSystemAccess::Range, API::CallNode {
|
||||
string methodName;
|
||||
|
||||
FileResponseCall() {
|
||||
this = API::moduleImport("aiofile").getMember("async_open").getACall() and
|
||||
methodName = "async_open"
|
||||
or
|
||||
this = API::moduleImport("aiofile").getMember("AIOFile").getACall() and
|
||||
methodName = "AIOFile"
|
||||
}
|
||||
|
||||
override DataFlow::Node getAPathArgument() {
|
||||
result = this.getParameter(0, "file_specifier").asSink() and
|
||||
methodName = "async_open"
|
||||
or
|
||||
result = this.getParameter(0, "filename").asSink() and
|
||||
methodName = "AIOFile"
|
||||
}
|
||||
}
|
||||
}
|
||||
28
python/ql/lib/semmle/python/frameworks/Aiofiles.qll
Normal file
28
python/ql/lib/semmle/python/frameworks/Aiofiles.qll
Normal file
@@ -0,0 +1,28 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `aiofiles` PyPI package.
|
||||
*
|
||||
* See https://pypi.org/project/aiofiles.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for the `aiofiles` PyPI package.
|
||||
*
|
||||
* See https://pypi.org/project/aiofiles.
|
||||
*/
|
||||
private module Aiofiles {
|
||||
/**
|
||||
* A call to the `open` function from `aiofiles` as a sink for Filesystem access.
|
||||
*/
|
||||
class FileResponseCall extends FileSystemAccess::Range, API::CallNode {
|
||||
FileResponseCall() { this = API::moduleImport("aiofiles").getMember("open").getACall() }
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getParameter(0, "file").asSink() }
|
||||
}
|
||||
}
|
||||
@@ -468,6 +468,27 @@ module AiohttpWebModel {
|
||||
override string getSourceType() { result = "aiohttp.web.Request" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A parameter that has a type annotation of `aiohttp.web.Request`, so with all
|
||||
* likelihood will receive an `aiohttp.web.Request` instance at some point when a
|
||||
* request handler is invoked.
|
||||
*/
|
||||
class AiohttpRequestParamFromTypeAnnotation extends Request::InstanceSource,
|
||||
DataFlow::ParameterNode, RemoteFlowSource::Range
|
||||
{
|
||||
AiohttpRequestParamFromTypeAnnotation() {
|
||||
not this instanceof AiohttpRequestHandlerRequestParam and
|
||||
this.getParameter().getAnnotation() =
|
||||
API::moduleImport("aiohttp")
|
||||
.getMember("web")
|
||||
.getMember("Request")
|
||||
.getAValueReachableFromSource()
|
||||
.asExpr()
|
||||
}
|
||||
|
||||
override string getSourceType() { result = "aiohttp.web.Request from type-annotation" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A read of the `request` attribute on an instance of an aiohttp.web View class,
|
||||
* which is the request being processed currently.
|
||||
@@ -498,14 +519,17 @@ module AiohttpWebModel {
|
||||
* - https://docs.aiohttp.org/en/stable/web_quickstart.html#aiohttp-web-exceptions
|
||||
*/
|
||||
class AiohttpWebResponseInstantiation extends Http::Server::HttpResponse::Range,
|
||||
Response::InstanceSource, DataFlow::CallCfgNode
|
||||
Response::InstanceSource, API::CallNode
|
||||
{
|
||||
API::Node apiNode;
|
||||
|
||||
AiohttpWebResponseInstantiation() {
|
||||
this = apiNode.getACall() and
|
||||
(
|
||||
apiNode = API::moduleImport("aiohttp").getMember("web").getMember("Response")
|
||||
apiNode =
|
||||
API::moduleImport("aiohttp")
|
||||
.getMember("web")
|
||||
.getMember(["FileResponse", "Response", "StreamResponse"])
|
||||
or
|
||||
exists(string httpExceptionClassName |
|
||||
httpExceptionClassName in [
|
||||
@@ -545,6 +569,10 @@ module AiohttpWebModel {
|
||||
|
||||
override DataFlow::Node getMimetypeOrContentTypeArg() {
|
||||
result = this.getArgByName("content_type")
|
||||
or
|
||||
exists(string key | key.toLowerCase() = "content-type" |
|
||||
result = this.getKeywordParameter("headers").getSubscript(key).getAValueReachingSink()
|
||||
)
|
||||
}
|
||||
|
||||
override string getMimetypeDefault() {
|
||||
@@ -556,6 +584,37 @@ module AiohttpWebModel {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the `aiohttp.web.FileResponse` constructor as a sink for Filesystem access.
|
||||
*/
|
||||
class FileResponseCall extends FileSystemAccess::Range, API::CallNode {
|
||||
FileResponseCall() {
|
||||
this = API::moduleImport("aiohttp").getMember("web").getMember("FileResponse").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getParameter(0, "path").asSink() }
|
||||
}
|
||||
|
||||
/**
|
||||
* An instantiation of `aiohttp.web.StreamResponse`.
|
||||
*
|
||||
* See https://docs.aiohttp.org/en/stable/web_reference.html#aiohttp.web.StreamResponse
|
||||
*/
|
||||
class StreamResponse extends AiohttpWebResponseInstantiation {
|
||||
StreamResponse() {
|
||||
this = API::moduleImport("aiohttp").getMember("web").getMember("StreamResponse").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getBody() {
|
||||
result =
|
||||
this.getReturn()
|
||||
.getMember(["write", "write_eof"])
|
||||
.getACall()
|
||||
.getParameter(0, "data")
|
||||
.asSink()
|
||||
}
|
||||
}
|
||||
|
||||
/** Gets an HTTP response instance. */
|
||||
private API::Node aiohttpResponseInstance() {
|
||||
result = any(AiohttpWebResponseInstantiation call).getApiNode().getReturn()
|
||||
@@ -670,14 +729,14 @@ private module AiohttpClientModel {
|
||||
string methodName;
|
||||
|
||||
OutgoingRequestCall() {
|
||||
methodName in [Http::httpVerbLower(), "request"] and
|
||||
methodName in [Http::httpVerbLower(), "request", "ws_connect"] and
|
||||
this = instance().getMember(methodName).getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAUrlPart() {
|
||||
result = this.getArgByName("url")
|
||||
or
|
||||
not methodName = "request" and
|
||||
methodName in [Http::httpVerbLower(), "ws_connect"] and
|
||||
result = this.getArg(0)
|
||||
or
|
||||
methodName = "request" and
|
||||
|
||||
54
python/ql/lib/semmle/python/frameworks/Anyio.qll
Normal file
54
python/ql/lib/semmle/python/frameworks/Anyio.qll
Normal file
@@ -0,0 +1,54 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `anyio` PyPI package.
|
||||
*
|
||||
* See https://pypi.org/project/anyio.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for the `anyio` PyPI package.
|
||||
*
|
||||
* See https://pypi.org/project/anyio.
|
||||
*/
|
||||
private module Anyio {
|
||||
/**
|
||||
* A call to the `from_path` function from `FileReadStream` or `FileWriteStream` constructors of `anyio.streams.file` as a sink for Filesystem access.
|
||||
*/
|
||||
class FileStreamCall extends FileSystemAccess::Range, API::CallNode {
|
||||
FileStreamCall() {
|
||||
this =
|
||||
API::moduleImport("anyio")
|
||||
.getMember("streams")
|
||||
.getMember("file")
|
||||
.getMember(["FileReadStream", "FileWriteStream"])
|
||||
.getMember("from_path")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getParameter(0, "path").asSink() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the `Path` constructor from `anyio` as a sink for Filesystem access.
|
||||
*/
|
||||
class PathCall extends FileSystemAccess::Range, API::CallNode {
|
||||
PathCall() { this = API::moduleImport("anyio").getMember("Path").getACall() }
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getParameter(0).asSink() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the `open_file` function from `anyio` as a sink for Filesystem access.
|
||||
*/
|
||||
class OpenFileCall extends FileSystemAccess::Range, API::CallNode {
|
||||
OpenFileCall() { this = API::moduleImport("anyio").getMember("open_file").getACall() }
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getParameter(0, "file").asSink() }
|
||||
}
|
||||
}
|
||||
38
python/ql/lib/semmle/python/frameworks/BSon.qll
Normal file
38
python/ql/lib/semmle/python/frameworks/BSon.qll
Normal file
@@ -0,0 +1,38 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `bson` PyPI package.
|
||||
* See
|
||||
* - https://pypi.org/project/bson/
|
||||
* - https://github.com/py-bson/bson
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for the `bson` PyPI package.
|
||||
* See
|
||||
* - https://pypi.org/project/bson/
|
||||
* - https://github.com/py-bson/bson
|
||||
*/
|
||||
private module BSon {
|
||||
/**
|
||||
* ObjectId returns a string representing an id.
|
||||
* If at any time ObjectId can't parse it's input (like when a tainted dict in passed in),
|
||||
* then ObjectId will throw an error preventing the query from running.
|
||||
*/
|
||||
private class BsonObjectIdCall extends DataFlow::CallCfgNode, NoSqlSanitizer::Range {
|
||||
BsonObjectIdCall() {
|
||||
exists(API::Node mod |
|
||||
mod = API::moduleImport("bson")
|
||||
or
|
||||
mod = API::moduleImport("bson").getMember(["objectid", "json_util"])
|
||||
|
|
||||
this = mod.getMember("ObjectId").getACall()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
}
|
||||
35
python/ql/lib/semmle/python/frameworks/Baize.qll
Normal file
35
python/ql/lib/semmle/python/frameworks/Baize.qll
Normal file
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `baize` PyPI package.
|
||||
*
|
||||
* See https://pypi.org/project/baize.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
|
||||
private import semmle.python.frameworks.Stdlib
|
||||
|
||||
/**
|
||||
* Provides models for `baize` PyPI package.
|
||||
*
|
||||
* See https://pypi.org/project/baize.
|
||||
*/
|
||||
module Baize {
|
||||
/**
|
||||
* A call to the `baize.asgi.FileResponse` constructor as a sink for Filesystem access.
|
||||
*
|
||||
* it is not contained to Starlette source code but it is mentioned in documents as an alternative to Starlette FileResponse
|
||||
*/
|
||||
class BaizeFileResponseCall extends FileSystemAccess::Range, API::CallNode {
|
||||
BaizeFileResponseCall() {
|
||||
this = API::moduleImport("baize").getMember("asgi").getMember("FileResponse").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAPathArgument() {
|
||||
result = this.getParameter(0, "filepath").asSink()
|
||||
}
|
||||
}
|
||||
}
|
||||
48
python/ql/lib/semmle/python/frameworks/Cherrypy.qll
Normal file
48
python/ql/lib/semmle/python/frameworks/Cherrypy.qll
Normal file
@@ -0,0 +1,48 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `cherrypy` PyPI package.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for the `cherrypy` PyPI package.
|
||||
* See https://cherrypy.dev/.
|
||||
*/
|
||||
private module Cherrypy {
|
||||
/**
|
||||
* Holds for an instance of `cherrypy.lib.static`
|
||||
*/
|
||||
API::Node libStatic() {
|
||||
result = API::moduleImport("cherrypy").getMember("lib").getMember("static")
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the `serve_file` or `serve_download`or `staticfile` functions of `cherrypy.lib.static` as a sink for Filesystem access.
|
||||
*/
|
||||
class FileResponseCall extends FileSystemAccess::Range, API::CallNode {
|
||||
string funcName;
|
||||
|
||||
FileResponseCall() {
|
||||
this = libStatic().getMember("staticfile").getACall() and
|
||||
funcName = "staticfile"
|
||||
or
|
||||
this = libStatic().getMember("serve_file").getACall() and
|
||||
funcName = "serve_file"
|
||||
or
|
||||
this = libStatic().getMember("serve_download").getACall() and
|
||||
funcName = "serve_download"
|
||||
}
|
||||
|
||||
override DataFlow::Node getAPathArgument() {
|
||||
result = this.getParameter(0, "path").asSink() and funcName = ["serve_download", "serve_file"]
|
||||
or
|
||||
result = this.getParameter(0, "filename").asSink() and
|
||||
funcName = "staticfile"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -128,6 +128,8 @@ private module CryptodomeModel {
|
||||
this = newCall.getReturn().getMember(methodName).getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getInitialization() { result = newCall }
|
||||
|
||||
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(cipherName) }
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
@@ -181,21 +183,23 @@ private module CryptodomeModel {
|
||||
class CryptodomeGenericSignatureOperation extends Cryptography::CryptographicOperation::Range,
|
||||
DataFlow::CallCfgNode
|
||||
{
|
||||
API::CallNode newCall;
|
||||
string methodName;
|
||||
string signatureName;
|
||||
|
||||
CryptodomeGenericSignatureOperation() {
|
||||
methodName in ["sign", "verify"] and
|
||||
this =
|
||||
newCall =
|
||||
API::moduleImport(["Crypto", "Cryptodome"])
|
||||
.getMember("Signature")
|
||||
.getMember(signatureName)
|
||||
.getMember("new")
|
||||
.getReturn()
|
||||
.getMember(methodName)
|
||||
.getACall()
|
||||
.getACall() and
|
||||
this = newCall.getReturn().getMember(methodName).getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getInitialization() { result = newCall }
|
||||
|
||||
override Cryptography::CryptographicAlgorithm getAlgorithm() {
|
||||
result.matchesName(signatureName)
|
||||
}
|
||||
@@ -221,19 +225,23 @@ private module CryptodomeModel {
|
||||
class CryptodomeGenericHashOperation extends Cryptography::CryptographicOperation::Range,
|
||||
DataFlow::CallCfgNode
|
||||
{
|
||||
API::CallNode newCall;
|
||||
string hashName;
|
||||
|
||||
CryptodomeGenericHashOperation() {
|
||||
exists(API::Node hashModule |
|
||||
hashModule =
|
||||
API::moduleImport(["Crypto", "Cryptodome"]).getMember("Hash").getMember(hashName)
|
||||
API::moduleImport(["Crypto", "Cryptodome"]).getMember("Hash").getMember(hashName) and
|
||||
newCall = hashModule.getMember("new").getACall()
|
||||
|
|
||||
this = hashModule.getMember("new").getACall()
|
||||
this = newCall
|
||||
or
|
||||
this = hashModule.getMember("new").getReturn().getMember("update").getACall()
|
||||
this = newCall.getReturn().getMember("update").getACall()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getInitialization() { result = newCall }
|
||||
|
||||
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) }
|
||||
|
||||
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
|
||||
|
||||
@@ -209,18 +209,18 @@ private module CryptographyModel {
|
||||
class CryptographyGenericCipherOperation extends Cryptography::CryptographicOperation::Range,
|
||||
DataFlow::MethodCallNode
|
||||
{
|
||||
API::CallNode init;
|
||||
string algorithmName;
|
||||
string modeName;
|
||||
|
||||
CryptographyGenericCipherOperation() {
|
||||
this =
|
||||
cipherInstance(algorithmName, modeName)
|
||||
.getMember(["decryptor", "encryptor"])
|
||||
.getReturn()
|
||||
.getMember(["update", "update_into"])
|
||||
.getACall()
|
||||
init =
|
||||
cipherInstance(algorithmName, modeName).getMember(["decryptor", "encryptor"]).getACall() and
|
||||
this = init.getReturn().getMember(["update", "update_into"]).getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getInitialization() { result = init }
|
||||
|
||||
override Cryptography::CryptographicAlgorithm getAlgorithm() {
|
||||
result.matchesName(algorithmName)
|
||||
}
|
||||
@@ -247,19 +247,17 @@ private module CryptographyModel {
|
||||
}
|
||||
|
||||
/** Gets a reference to a Hash instance using algorithm with `algorithmName`. */
|
||||
private API::Node hashInstance(string algorithmName) {
|
||||
exists(API::CallNode call | result = call.getReturn() |
|
||||
call =
|
||||
API::moduleImport("cryptography")
|
||||
.getMember("hazmat")
|
||||
.getMember("primitives")
|
||||
.getMember("hashes")
|
||||
.getMember("Hash")
|
||||
.getACall() and
|
||||
algorithmClassRef(algorithmName).getReturn().getAValueReachableFromSource() in [
|
||||
call.getArg(0), call.getArgByName("algorithm")
|
||||
]
|
||||
)
|
||||
private API::CallNode hashInstance(string algorithmName) {
|
||||
result =
|
||||
API::moduleImport("cryptography")
|
||||
.getMember("hazmat")
|
||||
.getMember("primitives")
|
||||
.getMember("hashes")
|
||||
.getMember("Hash")
|
||||
.getACall() and
|
||||
algorithmClassRef(algorithmName).getReturn().getAValueReachableFromSource() in [
|
||||
result.getArg(0), result.getArgByName("algorithm")
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -268,12 +266,16 @@ private module CryptographyModel {
|
||||
class CryptographyGenericHashOperation extends Cryptography::CryptographicOperation::Range,
|
||||
DataFlow::MethodCallNode
|
||||
{
|
||||
API::CallNode init;
|
||||
string algorithmName;
|
||||
|
||||
CryptographyGenericHashOperation() {
|
||||
this = hashInstance(algorithmName).getMember("update").getACall()
|
||||
init = hashInstance(algorithmName) and
|
||||
this = init.getReturn().getMember("update").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getInitialization() { result = init }
|
||||
|
||||
override Cryptography::CryptographicAlgorithm getAlgorithm() {
|
||||
result.matchesName(algorithmName)
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ private import semmle.python.regex
|
||||
private import semmle.python.frameworks.internal.PoorMansFunctionResolution
|
||||
private import semmle.python.frameworks.internal.SelfRefMixin
|
||||
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
|
||||
private import semmle.python.security.dataflow.UrlRedirectCustomizations
|
||||
|
||||
/**
|
||||
* INTERNAL: Do not use.
|
||||
@@ -2415,7 +2416,10 @@ module PrivateDjango {
|
||||
// Since we don't know the URL pattern, we simply mark all parameters as a routed
|
||||
// parameter. This should give us more RemoteFlowSources but could also lead to
|
||||
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
|
||||
result in [this.getArg(_), this.getArgByName(_)] and
|
||||
result in [
|
||||
this.getArg(_), this.getArgByName(_), //
|
||||
this.getVararg().(Parameter), this.getKwarg().(Parameter), // TODO: These sources should be modeled as storing content!
|
||||
] and
|
||||
not result = any(int i | i < this.getFirstPossibleRoutedParamIndex() | this.getArg(i))
|
||||
}
|
||||
|
||||
@@ -2451,13 +2455,20 @@ module PrivateDjango {
|
||||
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
|
||||
exists(DjangoRouteHandler routeHandler | routeHandler = this.getARequestHandler() |
|
||||
not exists(this.getUrlPattern()) and
|
||||
result in [routeHandler.getArg(_), routeHandler.getArgByName(_)] and
|
||||
result in [
|
||||
routeHandler.getArg(_), routeHandler.getArgByName(_), //
|
||||
routeHandler.getVararg().(Parameter), routeHandler.getKwarg().(Parameter), // TODO: These sources should be modeled as storing content!
|
||||
] and
|
||||
not result =
|
||||
any(int i | i < routeHandler.getFirstPossibleRoutedParamIndex() | routeHandler.getArg(i))
|
||||
)
|
||||
or
|
||||
exists(string name |
|
||||
result = this.getARequestHandler().getArgByName(name) and
|
||||
(
|
||||
result = this.getARequestHandler().getKwarg() // TODO: These sources should be modeled as storing content!
|
||||
or
|
||||
result = this.getARequestHandler().getArgByName(name)
|
||||
) and
|
||||
exists(string match |
|
||||
match = this.getUrlPattern().regexpFind(pathRoutedParameterRegex(), _, _) and
|
||||
name = match.regexpCapture(pathRoutedParameterRegex(), 2)
|
||||
@@ -2474,7 +2485,10 @@ module PrivateDjango {
|
||||
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
|
||||
exists(DjangoRouteHandler routeHandler | routeHandler = this.getARequestHandler() |
|
||||
not exists(this.getUrlPattern()) and
|
||||
result in [routeHandler.getArg(_), routeHandler.getArgByName(_)] and
|
||||
result in [
|
||||
routeHandler.getArg(_), routeHandler.getArgByName(_), //
|
||||
routeHandler.getVararg().(Parameter), routeHandler.getKwarg().(Parameter), // TODO: These sources should be modeled as storing content!
|
||||
] and
|
||||
not result =
|
||||
any(int i | i < routeHandler.getFirstPossibleRoutedParamIndex() | routeHandler.getArg(i))
|
||||
)
|
||||
@@ -2487,13 +2501,22 @@ module PrivateDjango {
|
||||
// either using named capture groups (passed as keyword arguments) or using
|
||||
// unnamed capture groups (passed as positional arguments)
|
||||
not exists(regex.getGroupName(_, _)) and
|
||||
// first group will have group number 1
|
||||
result =
|
||||
routeHandler
|
||||
.getArg(routeHandler.getFirstPossibleRoutedParamIndex() - 1 +
|
||||
regex.getGroupNumber(_, _))
|
||||
(
|
||||
// first group will have group number 1
|
||||
result =
|
||||
routeHandler
|
||||
.getArg(routeHandler.getFirstPossibleRoutedParamIndex() - 1 +
|
||||
regex.getGroupNumber(_, _))
|
||||
or
|
||||
result = routeHandler.getVararg()
|
||||
)
|
||||
or
|
||||
result = routeHandler.getArgByName(regex.getGroupName(_, _))
|
||||
exists(regex.getGroupName(_, _)) and
|
||||
(
|
||||
result = routeHandler.getArgByName(regex.getGroupName(_, _))
|
||||
or
|
||||
result = routeHandler.getKwarg()
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -2788,4 +2811,31 @@ module PrivateDjango {
|
||||
|
||||
override predicate csrfEnabled() { decoratorName in ["csrf_protect", "requires_csrf_token"] }
|
||||
}
|
||||
|
||||
private predicate djangoUrlHasAllowedHostAndScheme(
|
||||
DataFlow::GuardNode g, ControlFlowNode node, boolean branch
|
||||
) {
|
||||
exists(API::CallNode call |
|
||||
call =
|
||||
API::moduleImport("django")
|
||||
.getMember("utils")
|
||||
.getMember("http")
|
||||
.getMember("url_has_allowed_host_and_scheme")
|
||||
.getACall() and
|
||||
g = call.asCfgNode() and
|
||||
node = call.getParameter(0, "url").asSink().asCfgNode() and
|
||||
branch = true
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `django.utils.http.url_has_allowed_host_and_scheme`, considered as a sanitizer-guard for URL redirection.
|
||||
*
|
||||
* See https://docs.djangoproject.com/en/4.2/_modules/django/utils/http/
|
||||
*/
|
||||
private class DjangoAllowedUrl extends UrlRedirect::Sanitizer {
|
||||
DjangoAllowedUrl() {
|
||||
this = DataFlow::BarrierGuard<djangoUrlHasAllowedHostAndScheme/3>::getABarrierNode()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -66,6 +66,9 @@ private module FastApi {
|
||||
result = this.getARequestHandler().getArgByName(_) and
|
||||
// type-annotated with `Response`
|
||||
not any(Response::RequestHandlerParam src).asExpr() = result
|
||||
or
|
||||
// **kwargs
|
||||
result = this.getARequestHandler().getKwarg()
|
||||
}
|
||||
|
||||
override DataFlow::Node getUrlPatternArg() {
|
||||
|
||||
@@ -179,7 +179,13 @@ module Flask {
|
||||
* - https://flask.palletsprojects.com/en/2.2.x/api/#flask.json.jsonify
|
||||
*/
|
||||
private class FlaskJsonifyCall extends InstanceSource, DataFlow::CallCfgNode {
|
||||
FlaskJsonifyCall() { this = API::moduleImport("flask").getMember("jsonify").getACall() }
|
||||
FlaskJsonifyCall() {
|
||||
this = API::moduleImport("flask").getMember("jsonify").getACall()
|
||||
or
|
||||
this = API::moduleImport("flask").getMember("json").getMember("jsonify").getACall()
|
||||
or
|
||||
this = FlaskApp::instance().getMember("json").getMember("response").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getBody() { result in [this.getArg(_), this.getArgByName(_)] }
|
||||
|
||||
@@ -273,6 +279,9 @@ module Flask {
|
||||
name = match.regexpCapture(werkzeug_rule_re(), 4)
|
||||
)
|
||||
)
|
||||
or
|
||||
// **kwargs
|
||||
result = this.getARequestHandler().getKwarg()
|
||||
}
|
||||
|
||||
override string getFramework() { result = "Flask" }
|
||||
@@ -341,6 +350,12 @@ module Flask {
|
||||
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
|
||||
result in [this.getArg(_), this.getArgByName(_)] and
|
||||
not result = this.getArg(0)
|
||||
or
|
||||
// *args
|
||||
result = this.getVararg()
|
||||
or
|
||||
// **kwargs
|
||||
result = this.getKwarg()
|
||||
}
|
||||
|
||||
override string getFramework() { result = "Flask" }
|
||||
@@ -453,7 +468,8 @@ module Flask {
|
||||
FlaskRouteHandlerReturn() {
|
||||
exists(Function routeHandler |
|
||||
routeHandler = any(FlaskRouteSetup rs).getARequestHandler() and
|
||||
node = routeHandler.getAReturnValueFlowNode()
|
||||
node = routeHandler.getAReturnValueFlowNode() and
|
||||
not this instanceof Flask::Response::InstanceSource
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
33
python/ql/lib/semmle/python/frameworks/Joblib.qll
Normal file
33
python/ql/lib/semmle/python/frameworks/Joblib.qll
Normal file
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `joblib` PyPI package.
|
||||
* See https://pypi.org/project/joblib/.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for the `joblib` PyPI package.
|
||||
* See https://pypi.org/project/joblib/.
|
||||
*/
|
||||
private module Joblib {
|
||||
/**
|
||||
* A call to `joblib.load`
|
||||
* See https://pypi.org/project/joblib/
|
||||
*/
|
||||
private class JoblibLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
|
||||
JoblibLoadCall() { this = API::moduleImport("joblib").getMember("load").getACall() }
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
result in [this.getArg(0), this.getArgByName("filename")]
|
||||
}
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "joblib" }
|
||||
}
|
||||
}
|
||||
@@ -83,7 +83,7 @@ private module MarkupSafeModel {
|
||||
}
|
||||
|
||||
/** Taint propagation for `markupsafe.Markup`. */
|
||||
private class AddtionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
nodeTo.(ClassInstantiation).getArg(0) = nodeFrom
|
||||
}
|
||||
@@ -92,11 +92,7 @@ private module MarkupSafeModel {
|
||||
|
||||
/** Any escaping performed via the `markupsafe` package. */
|
||||
abstract private class MarkupSafeEscape extends Escaping::Range {
|
||||
override string getKind() {
|
||||
// TODO: this package claims to escape for both HTML and XML, but for now we don't
|
||||
// model XML.
|
||||
result = Escaping::getHtmlKind()
|
||||
}
|
||||
override string getKind() { result in [Escaping::getHtmlKind(), Escaping::getXmlKind()] }
|
||||
}
|
||||
|
||||
/** A call to any of the escaping functions in `markupsafe` */
|
||||
|
||||
42
python/ql/lib/semmle/python/frameworks/Numpy.qll
Normal file
42
python/ql/lib/semmle/python/frameworks/Numpy.qll
Normal file
@@ -0,0 +1,42 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `numpy` PyPI package.
|
||||
* See https://pypi.org/project/numpy/.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for the `numpy` PyPI package.
|
||||
* See https://pypi.org/project/numpy/.
|
||||
*/
|
||||
private module Numpy {
|
||||
/**
|
||||
* A call to `numpy.load`
|
||||
* See https://numpy.org/doc/stable/reference/generated/numpy.load.html
|
||||
*/
|
||||
private class NumpyLoadCall extends Decoding::Range, API::CallNode {
|
||||
NumpyLoadCall() { this = API::moduleImport("numpy").getMember("load").getACall() }
|
||||
|
||||
override predicate mayExecuteInput() {
|
||||
this.getParameter(2, "allow_pickle")
|
||||
.getAValueReachingSink()
|
||||
.asExpr()
|
||||
.(ImmutableLiteral)
|
||||
.booleanValue() = true
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getParameter(0, "filename").asSink() }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() {
|
||||
result = "numpy"
|
||||
or
|
||||
this.mayExecuteInput() and result = "pickle"
|
||||
}
|
||||
}
|
||||
}
|
||||
37
python/ql/lib/semmle/python/frameworks/Pandas.qll
Normal file
37
python/ql/lib/semmle/python/frameworks/Pandas.qll
Normal file
@@ -0,0 +1,37 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `pandas` PyPI package.
|
||||
* See https://pypi.org/project/pandas/.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for the `pandas` PyPI package.
|
||||
* See https://pypi.org/project/pandas/.
|
||||
*/
|
||||
private module Pandas {
|
||||
/**
|
||||
* A call to `pandas.read_pickle`
|
||||
* See https://pypi.org/project/pandas/
|
||||
* See https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_pickle.html
|
||||
*/
|
||||
private class PandasReadPickleCall extends Decoding::Range, DataFlow::CallCfgNode {
|
||||
PandasReadPickleCall() {
|
||||
this = API::moduleImport("pandas").getMember("read_pickle").getACall()
|
||||
}
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
result in [this.getArg(0), this.getArgByName("filepath_or_buffer")]
|
||||
}
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "pickle" }
|
||||
}
|
||||
}
|
||||
299
python/ql/lib/semmle/python/frameworks/PyMongo.qll
Normal file
299
python/ql/lib/semmle/python/frameworks/PyMongo.qll
Normal file
@@ -0,0 +1,299 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the PyMongo bindings.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
private module PyMongo {
|
||||
// API Nodes returning `Mongo` instances.
|
||||
/** Gets a reference to `pymongo.MongoClient` */
|
||||
private API::Node pyMongo() {
|
||||
result = API::moduleImport("pymongo").getMember("MongoClient").getReturn()
|
||||
or
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient
|
||||
result =
|
||||
API::moduleImport("pymongo").getMember("mongo_client").getMember("MongoClient").getReturn()
|
||||
}
|
||||
|
||||
/** Gets a reference to `flask_pymongo.PyMongo` */
|
||||
private API::Node flask_PyMongo() {
|
||||
result = API::moduleImport("flask_pymongo").getMember("PyMongo").getReturn()
|
||||
}
|
||||
|
||||
/** Gets a reference to `mongoengine` */
|
||||
private API::Node mongoEngine() { result = API::moduleImport("mongoengine") }
|
||||
|
||||
/** Gets a reference to `flask_mongoengine.MongoEngine` */
|
||||
private API::Node flask_MongoEngine() {
|
||||
result = API::moduleImport("flask_mongoengine").getMember("MongoEngine").getReturn()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to an initialized `Mongo` instance.
|
||||
* See `pyMongo()`, `flask_PyMongo()`
|
||||
*/
|
||||
private API::Node mongoClientInstance() {
|
||||
result = pyMongo() or
|
||||
result = flask_PyMongo()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` DB instance.
|
||||
*
|
||||
* ```py
|
||||
* from flask_pymongo import PyMongo
|
||||
* mongo = PyMongo(app)
|
||||
* mongo.db.user.find({'name': safe_search})
|
||||
* ```
|
||||
*
|
||||
* `mongo.db` would be a `Mongo` instance.
|
||||
*/
|
||||
private API::Node mongoDBInstance() {
|
||||
result = mongoClientInstance().getASubscript()
|
||||
or
|
||||
result = mongoClientInstance().getAMember()
|
||||
or
|
||||
result = mongoEngine().getMember(["get_db", "connect"]).getReturn()
|
||||
or
|
||||
result = mongoEngine().getMember("connection").getMember(["get_db", "connect"]).getReturn()
|
||||
or
|
||||
result = flask_MongoEngine().getMember("get_db").getReturn()
|
||||
or
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_default_database
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_database
|
||||
result = mongoClientInstance().getMember(["get_default_database", "get_database"]).getReturn()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` collection.
|
||||
*
|
||||
* ```py
|
||||
* from flask_pymongo import PyMongo
|
||||
* mongo = PyMongo(app)
|
||||
* mongo.db.user.find({'name': safe_search})
|
||||
* ```
|
||||
*
|
||||
* `mongo.db.user` would be a `Mongo` collection.
|
||||
*/
|
||||
private API::Node mongoCollection() {
|
||||
result = mongoDBInstance().getASubscript()
|
||||
or
|
||||
result = mongoDBInstance().getAMember()
|
||||
or
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.get_collection
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.create_collection
|
||||
result = mongoDBInstance().getMember(["get_collection", "create_collection"]).getReturn()
|
||||
}
|
||||
|
||||
/** Gets the name of a find_* relevant `Mongo` collection-level operation method. */
|
||||
private string mongoCollectionMethodName() {
|
||||
result in [
|
||||
"find", "find_raw_batches", "find_one", "find_one_and_delete", "find_and_modify",
|
||||
"find_one_and_replace", "find_one_and_update", "find_one_or_404"
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` collection method call
|
||||
*
|
||||
* ```py
|
||||
* from flask_pymongo import PyMongo
|
||||
* mongo = PyMongo(app)
|
||||
* mongo.db.user.find({'name': safe_search})
|
||||
* ```
|
||||
*
|
||||
* `mongo.db.user.find({'name': safe_search})` would be a collection method call.
|
||||
*/
|
||||
private class MongoCollectionCall extends API::CallNode, NoSqlExecution::Range {
|
||||
MongoCollectionCall() {
|
||||
this = mongoCollection().getMember(mongoCollectionMethodName()).getACall()
|
||||
}
|
||||
|
||||
/** Gets the argument that specifies the NoSQL query to be executed, as an API::node */
|
||||
pragma[inline]
|
||||
API::Node getQueryAsApiNode() {
|
||||
// 'filter' is allowed keyword in pymongo, see https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.find
|
||||
result = this.getParameter(0, "filter")
|
||||
}
|
||||
|
||||
override DataFlow::Node getQuery() { result = this.getQueryAsApiNode().asSink() }
|
||||
|
||||
override predicate interpretsDict() { any() }
|
||||
|
||||
override predicate vulnerableToStrings() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* See https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.aggregate
|
||||
*/
|
||||
private class MongoCollectionAggregation extends API::CallNode, NoSqlExecution::Range {
|
||||
MongoCollectionAggregation() { this = mongoCollection().getMember("aggregate").getACall() }
|
||||
|
||||
override DataFlow::Node getQuery() {
|
||||
result = this.getParameter(0, "pipeline").getASubscript().asSink()
|
||||
}
|
||||
|
||||
override predicate interpretsDict() { any() }
|
||||
|
||||
override predicate vulnerableToStrings() { none() }
|
||||
}
|
||||
|
||||
private class MongoMapReduce extends API::CallNode, NoSqlExecution::Range {
|
||||
MongoMapReduce() { this = mongoCollection().getMember("map_reduce").getACall() }
|
||||
|
||||
override DataFlow::Node getQuery() { result in [this.getArg(0), this.getArg(1)] }
|
||||
|
||||
override predicate interpretsDict() { none() }
|
||||
|
||||
override predicate vulnerableToStrings() { any() }
|
||||
}
|
||||
|
||||
private class MongoMapReduceQuery extends API::CallNode, NoSqlExecution::Range {
|
||||
MongoMapReduceQuery() { this = mongoCollection().getMember("map_reduce").getACall() }
|
||||
|
||||
override DataFlow::Node getQuery() { result = this.getArgByName("query") }
|
||||
|
||||
override predicate interpretsDict() { any() }
|
||||
|
||||
override predicate vulnerableToStrings() { none() }
|
||||
}
|
||||
|
||||
/** The `$where` query operator executes a string as JavaScript. */
|
||||
private class WhereQueryOperator extends DataFlow::Node, Decoding::Range {
|
||||
DataFlow::Node query;
|
||||
|
||||
WhereQueryOperator() {
|
||||
exists(API::Node dictionary |
|
||||
dictionary = any(MongoCollectionCall c).getQueryAsApiNode() and
|
||||
query = dictionary.getSubscript("$where").asSink() and
|
||||
this = dictionary.getAValueReachingSink()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = query }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "NoSQL" }
|
||||
|
||||
override predicate mayExecuteInput() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* The `$function` query operator executes its `body` string as JavaScript.
|
||||
*
|
||||
* See https://www.mongodb.com/docs/manual/reference/operator/aggregation/function/#mongodb-expression-exp.-function
|
||||
*/
|
||||
private class FunctionQueryOperator extends DataFlow::Node, Decoding::Range {
|
||||
DataFlow::Node query;
|
||||
|
||||
FunctionQueryOperator() {
|
||||
exists(API::Node dictionary |
|
||||
dictionary =
|
||||
any(MongoCollectionCall c).getQueryAsApiNode().getASubscript*().getSubscript("$function") and
|
||||
query = dictionary.getSubscript("body").asSink() and
|
||||
this = dictionary.getAValueReachingSink()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = query }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "NoSQL" }
|
||||
|
||||
override predicate mayExecuteInput() { none() }
|
||||
}
|
||||
|
||||
/**
|
||||
* The `$accumulator` query operator executes strings in some of its fields as JavaScript.
|
||||
*
|
||||
* See https://www.mongodb.com/docs/manual/reference/operator/aggregation/accumulator/#mongodb-group-grp.-accumulator
|
||||
*/
|
||||
private class AccumulatorQueryOperator extends DataFlow::Node, Decoding::Range {
|
||||
DataFlow::Node query;
|
||||
|
||||
AccumulatorQueryOperator() {
|
||||
exists(API::Node dictionary |
|
||||
dictionary =
|
||||
mongoCollection()
|
||||
.getMember("aggregate")
|
||||
.getACall()
|
||||
.getParameter(0)
|
||||
.getASubscript*()
|
||||
.getSubscript("$accumulator") and
|
||||
query = dictionary.getSubscript(["init", "accumulate", "merge", "finalize"]).asSink() and
|
||||
this = dictionary.getAValueReachingSink()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = query }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "NoSQL" }
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a call from a class whose base is a reference to `mongoEngine()` or `flask_MongoEngine()`'s
|
||||
* `Document` or `EmbeddedDocument` objects and its attribute is `objects`.
|
||||
*
|
||||
* ```py
|
||||
* from flask_mongoengine import MongoEngine
|
||||
* db = MongoEngine(app)
|
||||
* class Movie(db.Document):
|
||||
* title = db.StringField(required=True)
|
||||
*
|
||||
* Movie.objects(__raw__=json_search)
|
||||
* ```
|
||||
*
|
||||
* `Movie.objects(__raw__=json_search)` would be the result.
|
||||
*/
|
||||
private class MongoEngineObjectsCall extends DataFlow::CallCfgNode, NoSqlExecution::Range {
|
||||
MongoEngineObjectsCall() {
|
||||
this =
|
||||
[mongoEngine(), flask_MongoEngine()]
|
||||
.getMember(["Document", "EmbeddedDocument"])
|
||||
.getASubclass()
|
||||
.getMember("objects")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getQuery() { result = this.getArgByName(_) }
|
||||
|
||||
override predicate interpretsDict() { any() }
|
||||
|
||||
override predicate vulnerableToStrings() { none() }
|
||||
}
|
||||
|
||||
/** Gets a reference to `mongosanitizer.sanitizer.sanitize` */
|
||||
private class MongoSanitizerCall extends DataFlow::CallCfgNode, NoSqlSanitizer::Range {
|
||||
MongoSanitizerCall() {
|
||||
this =
|
||||
API::moduleImport("mongosanitizer").getMember("sanitizer").getMember("sanitize").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
|
||||
/**
|
||||
* An equality operator can protect against dictionary interpretation.
|
||||
* For instance, in `{'password': {"$eq": password} }`, if a dictionary is injected into
|
||||
* `password`, it will not match.
|
||||
*/
|
||||
private class EqualityOperator extends DataFlow::Node, NoSqlSanitizer::Range {
|
||||
EqualityOperator() {
|
||||
this =
|
||||
any(MongoCollectionCall c).getQueryAsApiNode().getASubscript*().getSubscript("$eq").asSink()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this }
|
||||
}
|
||||
}
|
||||
@@ -131,7 +131,10 @@ private module RestFramework {
|
||||
"initial", "http_method_not_allowed", "permission_denied", "throttled",
|
||||
"get_authenticate_header", "perform_content_negotiation", "perform_authentication",
|
||||
"check_permissions", "check_object_permissions", "check_throttles", "determine_version",
|
||||
"initialize_request", "finalize_response", "dispatch", "options"
|
||||
"initialize_request", "finalize_response", "dispatch", "options",
|
||||
// ModelViewSet
|
||||
// https://github.com/encode/django-rest-framework/blob/master/rest_framework/viewsets.py
|
||||
"create", "retrieve", "update", "partial_update", "destroy", "list"
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -169,7 +172,10 @@ private module RestFramework {
|
||||
// Since we don't know the URL pattern, we simply mark all parameters as a routed
|
||||
// parameter. This should give us more RemoteFlowSources but could also lead to
|
||||
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
|
||||
result in [this.getArg(_), this.getArgByName(_)] and
|
||||
result in [
|
||||
this.getArg(_), this.getArgByName(_), //
|
||||
this.getVararg().(Parameter), this.getKwarg().(Parameter), // TODO: These sources should be modeled as storing content!
|
||||
] and
|
||||
not result = any(int i | i < this.getFirstPossibleRoutedParamIndex() | this.getArg(i))
|
||||
}
|
||||
|
||||
|
||||
@@ -37,6 +37,8 @@ private module Rsa {
|
||||
class RsaEncryptCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode {
|
||||
RsaEncryptCall() { this = API::moduleImport("rsa").getMember("encrypt").getACall() }
|
||||
|
||||
override DataFlow::Node getInitialization() { result = this }
|
||||
|
||||
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.getName() = "RSA" }
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
@@ -54,6 +56,8 @@ private module Rsa {
|
||||
class RsaDecryptCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode {
|
||||
RsaDecryptCall() { this = API::moduleImport("rsa").getMember("decrypt").getACall() }
|
||||
|
||||
override DataFlow::Node getInitialization() { result = this }
|
||||
|
||||
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.getName() = "RSA" }
|
||||
|
||||
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("crypto")] }
|
||||
@@ -69,6 +73,8 @@ private module Rsa {
|
||||
class RsaSignCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode {
|
||||
RsaSignCall() { this = API::moduleImport("rsa").getMember("sign").getACall() }
|
||||
|
||||
override DataFlow::Node getInitialization() { result = this }
|
||||
|
||||
override Cryptography::CryptographicAlgorithm getAlgorithm() {
|
||||
// signature part
|
||||
result.getName() = "RSA"
|
||||
@@ -96,6 +102,8 @@ private module Rsa {
|
||||
class RsaVerifyCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode {
|
||||
RsaVerifyCall() { this = API::moduleImport("rsa").getMember("verify").getACall() }
|
||||
|
||||
override DataFlow::Node getInitialization() { result = this }
|
||||
|
||||
override Cryptography::CryptographicAlgorithm getAlgorithm() {
|
||||
// note that technically there is also a hashing operation going on but we don't
|
||||
// know what algorithm is used up front, since it is encoded in the signature
|
||||
@@ -121,6 +129,8 @@ private module Rsa {
|
||||
{
|
||||
RsaComputeHashCall() { this = API::moduleImport("rsa").getMember("compute_hash").getACall() }
|
||||
|
||||
override DataFlow::Node getInitialization() { result = this }
|
||||
|
||||
override Cryptography::CryptographicAlgorithm getAlgorithm() {
|
||||
exists(StrConst str, DataFlow::Node hashNameArg |
|
||||
hashNameArg in [this.getArg(1), this.getArgByName("method_name")] and
|
||||
@@ -144,6 +154,8 @@ private module Rsa {
|
||||
class RsaSignHashCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode {
|
||||
RsaSignHashCall() { this = API::moduleImport("rsa").getMember("sign_hash").getACall() }
|
||||
|
||||
override DataFlow::Node getInitialization() { result = this }
|
||||
|
||||
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.getName() = "RSA" }
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
|
||||
42
python/ql/lib/semmle/python/frameworks/Sanic.qll
Normal file
42
python/ql/lib/semmle/python/frameworks/Sanic.qll
Normal file
@@ -0,0 +1,42 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `sanic` PyPI package.
|
||||
* See https://sanic.dev/.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for the `sanic` PyPI package.
|
||||
* See https://sanic.dev/.
|
||||
*/
|
||||
private module Sanic {
|
||||
/**
|
||||
* Provides models for Sanic applications (an instance of `sanic.Sanic`).
|
||||
*/
|
||||
module App {
|
||||
/** Gets a reference to a Sanic application (an instance of `sanic.Sanic`). */
|
||||
API::Node instance() { result = API::moduleImport("sanic").getMember("Sanic").getReturn() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the `file` or `file_stream` functions of `sanic.response` as a sink for Filesystem access.
|
||||
*/
|
||||
class FileResponseCall extends FileSystemAccess::Range, API::CallNode {
|
||||
FileResponseCall() {
|
||||
this =
|
||||
API::moduleImport("sanic")
|
||||
.getMember("response")
|
||||
.getMember(["file", "file_stream"])
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAPathArgument() {
|
||||
result = this.getParameter(0, "location").asSink()
|
||||
}
|
||||
}
|
||||
}
|
||||
67
python/ql/lib/semmle/python/frameworks/ServerLess.qll
Normal file
67
python/ql/lib/semmle/python/frameworks/ServerLess.qll
Normal file
@@ -0,0 +1,67 @@
|
||||
/**
|
||||
* Provides classes and predicates for working with those serverless handlers,
|
||||
* handled by the shared library.
|
||||
*
|
||||
* E.g. [AWS](https://docs.aws.amazon.com/lambda/latest/dg/python-handler.html).
|
||||
*
|
||||
* In particular a `RemoteFlowSource` is added for each.
|
||||
*/
|
||||
|
||||
import python
|
||||
import codeql.serverless.ServerLess
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.RemoteFlowSources
|
||||
|
||||
private module YamlImpl implements Input {
|
||||
import semmle.python.Files
|
||||
import semmle.python.Yaml
|
||||
}
|
||||
|
||||
module SL = ServerLess<YamlImpl>;
|
||||
|
||||
/**
|
||||
* Gets a function that is a serverless request handler.
|
||||
*
|
||||
* For example: if an AWS serverless resource contains the following properties (in the "template.yml" file):
|
||||
* ```yaml
|
||||
* Handler: mylibrary.handler
|
||||
* Runtime: pythonXXX
|
||||
* CodeUri: backend/src/
|
||||
* ```
|
||||
*
|
||||
* And a file "mylibrary.py" exists in the folder "backend/src" (relative to the "template.yml" file).
|
||||
* Then the result of this predicate is a function exported as "handler" from "mylibrary.py".
|
||||
* The "mylibrary.py" file could for example look like:
|
||||
*
|
||||
* ```python
|
||||
* def handler(event):
|
||||
* ...
|
||||
* ```
|
||||
*/
|
||||
private Function getAServerlessHandler() {
|
||||
exists(File file, string stem, string handler, string runtime, Module mod |
|
||||
SL::hasServerlessHandler(stem, handler, _, runtime) and
|
||||
file.getAbsolutePath() = stem + ".py" and
|
||||
// if runtime is specified, it should be python
|
||||
(runtime = "" or runtime.matches("python%"))
|
||||
|
|
||||
mod.getFile() = file and
|
||||
result.getScope() = mod and
|
||||
result.getName() = handler
|
||||
)
|
||||
}
|
||||
|
||||
private DataFlow::ParameterNode getAHandlerEventParameter() {
|
||||
exists(Function func | func = getAServerlessHandler() |
|
||||
result.getParameter() in [func.getArg(0), func.getArgByName("event")]
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A serverless request handler event, seen as a RemoteFlowSource.
|
||||
*/
|
||||
private class ServerlessHandlerEventAsRemoteFlow extends RemoteFlowSource::Range {
|
||||
ServerlessHandlerEventAsRemoteFlow() { this = getAHandlerEventParameter() }
|
||||
|
||||
override string getSourceType() { result = "Serverless event" }
|
||||
}
|
||||
@@ -163,4 +163,16 @@ module Starlette {
|
||||
|
||||
/** DEPRECATED: Alias for Url */
|
||||
deprecated module URL = Url;
|
||||
|
||||
/**
|
||||
* A call to the `starlette.responses.FileResponse` constructor as a sink for Filesystem access.
|
||||
*/
|
||||
class FileResponseCall extends FileSystemAccess::Range, API::CallNode {
|
||||
FileResponseCall() {
|
||||
this =
|
||||
API::moduleImport("starlette").getMember("responses").getMember("FileResponse").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getParameter(0, "path").asSink() }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1479,6 +1479,26 @@ private module StdlibPrivate {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the `io.FileIO` constructor.
|
||||
* See https://docs.python.org/3/library/io.html#io.FileIO
|
||||
*/
|
||||
private class FileIOCall extends FileSystemAccess::Range, API::CallNode {
|
||||
FileIOCall() { this = API::moduleImport("io").getMember("FileIO").getACall() }
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getParameter(0, "file").asSink() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the `io.open_code` function.
|
||||
* See https://docs.python.org/3.11/library/io.html#io.open_code
|
||||
*/
|
||||
private class OpenCodeCall extends FileSystemAccess::Range, API::CallNode {
|
||||
OpenCodeCall() { this = API::moduleImport("io").getMember("open_code").getACall() }
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getParameter(0, "path").asSink() }
|
||||
}
|
||||
|
||||
/** Gets a reference to an open file. */
|
||||
private DataFlow::TypeTrackingNode openFile(DataFlow::TypeTracker t, FileSystemAccess openCall) {
|
||||
t.start() and
|
||||
@@ -1815,51 +1835,95 @@ private module StdlibPrivate {
|
||||
// ---------------------------------------------------------------------------
|
||||
// BaseHTTPServer (Python 2 only)
|
||||
// ---------------------------------------------------------------------------
|
||||
/** Gets a reference to the `BaseHttpServer` module. */
|
||||
API::Node baseHttpServer() { result = API::moduleImport("BaseHTTPServer") }
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Gets a reference to the `BaseHttpServer` module.
|
||||
*/
|
||||
deprecated API::Node baseHttpServer() { result = API::moduleImport("BaseHTTPServer") }
|
||||
|
||||
/** Provides models for the `BaseHttpServer` module. */
|
||||
module BaseHttpServer {
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Provides models for the `BaseHttpServer` module.
|
||||
*/
|
||||
deprecated module BaseHttpServer {
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Provides models for the `BaseHTTPServer.BaseHTTPRequestHandler` class (Python 2 only).
|
||||
*/
|
||||
module BaseHttpRequestHandler {
|
||||
/** Gets a reference to the `BaseHttpServer.BaseHttpRequestHandler` class. */
|
||||
API::Node classRef() { result = baseHttpServer().getMember("BaseHTTPRequestHandler") }
|
||||
deprecated module BaseHttpRequestHandler {
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Gets a reference to the `BaseHttpServer.BaseHttpRequestHandler` class.
|
||||
*/
|
||||
deprecated API::Node classRef() {
|
||||
result = baseHttpServer().getMember("BaseHTTPRequestHandler")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// SimpleHTTPServer (Python 2 only)
|
||||
// ---------------------------------------------------------------------------
|
||||
/** Gets a reference to the `SimpleHttpServer` module. */
|
||||
API::Node simpleHttpServer() { result = API::moduleImport("SimpleHTTPServer") }
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Gets a reference to the `SimpleHttpServer` module.
|
||||
*/
|
||||
deprecated API::Node simpleHttpServer() { result = API::moduleImport("SimpleHTTPServer") }
|
||||
|
||||
/** Provides models for the `SimpleHttpServer` module. */
|
||||
module SimpleHttpServer {
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Provides models for the `SimpleHttpServer` module.
|
||||
*/
|
||||
deprecated module SimpleHttpServer {
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Provides models for the `SimpleHTTPServer.SimpleHTTPRequestHandler` class (Python 2 only).
|
||||
*/
|
||||
module SimpleHttpRequestHandler {
|
||||
/** Gets a reference to the `SimpleHttpServer.SimpleHttpRequestHandler` class. */
|
||||
API::Node classRef() { result = simpleHttpServer().getMember("SimpleHTTPRequestHandler") }
|
||||
deprecated module SimpleHttpRequestHandler {
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Gets a reference to the `SimpleHttpServer.SimpleHttpRequestHandler` class.
|
||||
*/
|
||||
deprecated API::Node classRef() {
|
||||
result = simpleHttpServer().getMember("SimpleHTTPRequestHandler")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CGIHTTPServer (Python 2 only)
|
||||
// ---------------------------------------------------------------------------
|
||||
/** Gets a reference to the `CGIHTTPServer` module. */
|
||||
API::Node cgiHttpServer() { result = API::moduleImport("CGIHTTPServer") }
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Gets a reference to the `CGIHTTPServer` module.
|
||||
*/
|
||||
deprecated API::Node cgiHttpServer() { result = API::moduleImport("CGIHTTPServer") }
|
||||
|
||||
/** Provides models for the `CGIHTTPServer` module. */
|
||||
module CgiHttpServer {
|
||||
deprecated module CgiHttpServer {
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Provides models for the `CGIHTTPServer.CGIHTTPRequestHandler` class (Python 2 only).
|
||||
*/
|
||||
module CgiHttpRequestHandler {
|
||||
/** Gets a reference to the `CGIHTTPServer.CgiHttpRequestHandler` class. */
|
||||
API::Node classRef() { result = cgiHttpServer().getMember("CGIHTTPRequestHandler") }
|
||||
deprecated module CgiHttpRequestHandler {
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Gets a reference to the `CGIHTTPServer.CgiHttpRequestHandler` class.
|
||||
*/
|
||||
deprecated API::Node classRef() {
|
||||
result = cgiHttpServer().getMember("CGIHTTPRequestHandler")
|
||||
}
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for CgiHttpRequestHandler */
|
||||
@@ -1872,47 +1936,69 @@ private module StdlibPrivate {
|
||||
// ---------------------------------------------------------------------------
|
||||
// http (Python 3 only)
|
||||
// ---------------------------------------------------------------------------
|
||||
/** Gets a reference to the `http` module. */
|
||||
API::Node http() { result = API::moduleImport("http") }
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Gets a reference to the `http` module.
|
||||
*/
|
||||
deprecated API::Node http() { result = API::moduleImport("http") }
|
||||
|
||||
/** Provides models for the `http` module. */
|
||||
module StdlibHttp {
|
||||
deprecated module StdlibHttp {
|
||||
// -------------------------------------------------------------------------
|
||||
// http.server
|
||||
// -------------------------------------------------------------------------
|
||||
/** Gets a reference to the `http.server` module. */
|
||||
API::Node server() { result = http().getMember("server") }
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Gets a reference to the `http.server` module.
|
||||
*/
|
||||
deprecated API::Node server() { result = http().getMember("server") }
|
||||
|
||||
/** Provides models for the `http.server` module */
|
||||
module Server {
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Provides models for the `http.server` module
|
||||
*/
|
||||
deprecated module Server {
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Provides models for the `http.server.BaseHTTPRequestHandler` class (Python 3 only).
|
||||
*
|
||||
* See https://docs.python.org/3.9/library/http.server.html#http.server.BaseHTTPRequestHandler.
|
||||
*/
|
||||
module BaseHttpRequestHandler {
|
||||
deprecated module BaseHttpRequestHandler {
|
||||
/** Gets a reference to the `http.server.BaseHttpRequestHandler` class. */
|
||||
API::Node classRef() { result = server().getMember("BaseHTTPRequestHandler") }
|
||||
deprecated API::Node classRef() { result = server().getMember("BaseHTTPRequestHandler") }
|
||||
}
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Provides models for the `http.server.SimpleHTTPRequestHandler` class (Python 3 only).
|
||||
*
|
||||
* See https://docs.python.org/3.9/library/http.server.html#http.server.SimpleHTTPRequestHandler.
|
||||
*/
|
||||
module SimpleHttpRequestHandler {
|
||||
deprecated module SimpleHttpRequestHandler {
|
||||
/** Gets a reference to the `http.server.SimpleHttpRequestHandler` class. */
|
||||
API::Node classRef() { result = server().getMember("SimpleHTTPRequestHandler") }
|
||||
deprecated API::Node classRef() { result = server().getMember("SimpleHTTPRequestHandler") }
|
||||
}
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Provides models for the `http.server.CGIHTTPRequestHandler` class (Python 3 only).
|
||||
*
|
||||
* See https://docs.python.org/3.9/library/http.server.html#http.server.CGIHTTPRequestHandler.
|
||||
*/
|
||||
module CgiHttpRequestHandler {
|
||||
/** Gets a reference to the `http.server.CGIHTTPRequestHandler` class. */
|
||||
API::Node classRef() { result = server().getMember("CGIHTTPRequestHandler") }
|
||||
deprecated module CgiHttpRequestHandler {
|
||||
/**
|
||||
* DEPRECATED: Use API-graphs directly instead.
|
||||
*
|
||||
* Gets a reference to the `http.server.CGIHTTPRequestHandler` class.
|
||||
*/
|
||||
deprecated API::Node classRef() { result = server().getMember("CGIHTTPRequestHandler") }
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for CgiHttpRequestHandler */
|
||||
@@ -1933,13 +2019,13 @@ private module StdlibPrivate {
|
||||
result =
|
||||
[
|
||||
// Python 2
|
||||
BaseHttpServer::BaseHttpRequestHandler::classRef(),
|
||||
SimpleHttpServer::SimpleHttpRequestHandler::classRef(),
|
||||
CgiHttpServer::CgiHttpRequestHandler::classRef(),
|
||||
API::moduleImport("BaseHTTPServer").getMember("BaseHTTPRequestHandler"),
|
||||
API::moduleImport("SimpleHTTPServer").getMember("SimpleHTTPRequestHandler"),
|
||||
API::moduleImport("CGIHTTPServer").getMember("CGIHTTPRequestHandler"),
|
||||
// Python 3
|
||||
StdlibHttp::Server::BaseHttpRequestHandler::classRef(),
|
||||
StdlibHttp::Server::SimpleHttpRequestHandler::classRef(),
|
||||
StdlibHttp::Server::CgiHttpRequestHandler::classRef()
|
||||
API::moduleImport("http").getMember("server").getMember("BaseHTTPRequestHandler"),
|
||||
API::moduleImport("http").getMember("server").getMember("SimpleHTTPRequestHandler"),
|
||||
API::moduleImport("http").getMember("server").getMember("CGIHTTPRequestHandler"),
|
||||
].getASubclass*()
|
||||
}
|
||||
|
||||
@@ -2681,6 +2767,8 @@ private module StdlibPrivate {
|
||||
exists(this.getParameter(1, "data"))
|
||||
}
|
||||
|
||||
override DataFlow::Node getInitialization() { result = this }
|
||||
|
||||
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) }
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getParameter(1, "data").asSink() }
|
||||
@@ -2692,12 +2780,16 @@ private module StdlibPrivate {
|
||||
* A hashing operation by using the `update` method on the result of calling the `hashlib.new` function.
|
||||
*/
|
||||
class HashlibNewUpdateCall extends Cryptography::CryptographicOperation::Range, API::CallNode {
|
||||
API::CallNode init;
|
||||
string hashName;
|
||||
|
||||
HashlibNewUpdateCall() {
|
||||
this = hashlibNewCall(hashName).getReturn().getMember("update").getACall()
|
||||
init = hashlibNewCall(hashName) and
|
||||
this = init.getReturn().getMember("update").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getInitialization() { result = init }
|
||||
|
||||
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.matchesName(hashName) }
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
@@ -2736,7 +2828,14 @@ private module StdlibPrivate {
|
||||
* (such as `hashlib.md5`), by calling its' `update` method.
|
||||
*/
|
||||
class HashlibHashClassUpdateCall extends HashlibGenericHashOperation {
|
||||
HashlibHashClassUpdateCall() { this = hashClass.getReturn().getMember("update").getACall() }
|
||||
API::CallNode init;
|
||||
|
||||
HashlibHashClassUpdateCall() {
|
||||
init = hashClass.getACall() and
|
||||
this = hashClass.getReturn().getMember("update").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getInitialization() { result = init }
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
@@ -2753,6 +2852,8 @@ private module StdlibPrivate {
|
||||
exists([this.getArg(0), this.getArgByName("string")])
|
||||
}
|
||||
|
||||
override DataFlow::Node getInitialization() { result = this }
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
result = this.getArg(0)
|
||||
or
|
||||
@@ -2799,6 +2900,8 @@ private module StdlibPrivate {
|
||||
exists(this.getParameter(1, "msg").asSink())
|
||||
}
|
||||
|
||||
override DataFlow::Node getInitialization() { result = this }
|
||||
|
||||
override API::Node getDigestArg() { result = digestArg }
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getParameter(1, "msg").asSink() }
|
||||
@@ -2810,12 +2913,16 @@ private module StdlibPrivate {
|
||||
* See https://docs.python.org/3.11/library/hmac.html#hmac.HMAC.update
|
||||
*/
|
||||
class HmacUpdateCall extends HmacCryptographicOperation {
|
||||
API::CallNode init;
|
||||
API::Node digestArg;
|
||||
|
||||
HmacUpdateCall() {
|
||||
this = getHmacConstructorCall(digestArg).getReturn().getMember("update").getACall()
|
||||
init = getHmacConstructorCall(digestArg) and
|
||||
this = init.getReturn().getMember("update").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getInitialization() { result = init }
|
||||
|
||||
override API::Node getDigestArg() { result = digestArg }
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getParameter(0, "msg").asSink() }
|
||||
@@ -2829,6 +2936,8 @@ private module StdlibPrivate {
|
||||
class HmacDigestCall extends HmacCryptographicOperation {
|
||||
HmacDigestCall() { this = API::moduleImport("hmac").getMember("digest").getACall() }
|
||||
|
||||
override DataFlow::Node getInitialization() { result = this }
|
||||
|
||||
override API::Node getDigestArg() { result = this.getParameter(2, "digest") }
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getParameter(1, "msg").asSink() }
|
||||
@@ -2960,6 +3069,212 @@ private module StdlibPrivate {
|
||||
override string getName() { result = "re." + method }
|
||||
}
|
||||
|
||||
/**
|
||||
* A flow summary for compiled regex objects
|
||||
*
|
||||
* See https://docs.python.org/3.11/library/re.html#re-objects
|
||||
*/
|
||||
class RePatternSummary extends SummarizedCallable {
|
||||
RePatternSummary() { this = "re.Pattern" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result = API::moduleImport("re").getMember("compile").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() {
|
||||
result = API::moduleImport("re").getMember("compile").getAValueReachableFromSource()
|
||||
}
|
||||
|
||||
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
|
||||
input in ["Argument[0]", "Argument[pattern:]"] and
|
||||
output = "ReturnValue.Attribute[pattern]" and
|
||||
preservesValue = true
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A flow summary for methods returning a `re.Match` object
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html#re.Match
|
||||
*/
|
||||
class ReMatchSummary extends SummarizedCallable {
|
||||
ReMatchSummary() { this = ["re.Match", "compiled re.Match"] }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
this = "re.Match" and
|
||||
result = API::moduleImport("re").getMember(["match", "search", "fullmatch"]).getACall()
|
||||
or
|
||||
this = "compiled re.Match" and
|
||||
result =
|
||||
any(RePatternSummary c)
|
||||
.getACall()
|
||||
.(API::CallNode)
|
||||
.getReturn()
|
||||
.getMember(["match", "search", "fullmatch"])
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { none() }
|
||||
|
||||
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
|
||||
exists(string arg |
|
||||
this = "re.Match" and arg = "Argument[1]"
|
||||
or
|
||||
this = "compiled re.Match" and arg = "Argument[0]"
|
||||
|
|
||||
input in [arg, "Argument[string:]"] and
|
||||
(
|
||||
output = "ReturnValue.Attribute[string]" and
|
||||
preservesValue = true
|
||||
or
|
||||
// indexing such as `match[g]` is the same as `match.group(g)`
|
||||
// since you can index with both integers and strings, we model it as
|
||||
// both list element and dictionary... a bit of a hack, but no way to model
|
||||
// subscript operators directly with flow-summaries :|
|
||||
output in ["ReturnValue.ListElement", "ReturnValue.DictionaryElementAny"] and
|
||||
preservesValue = false
|
||||
)
|
||||
)
|
||||
or
|
||||
// regex pattern
|
||||
(
|
||||
this = "re.Match" and input in ["Argument[0]", "Argument[pattern:]"]
|
||||
or
|
||||
// for compiled regexes, this it is already stored in the `pattern` attribute
|
||||
this = "compiled re.Match" and input = "Argument[self].Attribute[pattern]"
|
||||
) and
|
||||
output = "ReturnValue.Attribute[re].Attribute[pattern]" and
|
||||
preservesValue = true
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A flow summary for methods on a `re.Match` object
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html#re.Match
|
||||
*/
|
||||
class ReMatchMethodsSummary extends SummarizedCallable {
|
||||
string methodName;
|
||||
|
||||
ReMatchMethodsSummary() {
|
||||
this = "re.Match." + methodName and
|
||||
methodName in ["expand", "group", "groups", "groupdict"]
|
||||
}
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result =
|
||||
any(ReMatchSummary c)
|
||||
.getACall()
|
||||
.(API::CallNode)
|
||||
.getReturn()
|
||||
.getMember(methodName)
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { none() }
|
||||
|
||||
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
|
||||
methodName = "expand" and
|
||||
preservesValue = false and
|
||||
(
|
||||
input = "Argument[0]" and output = "ReturnValue"
|
||||
or
|
||||
input = "Argument[self].Attribute[string]" and
|
||||
output = "ReturnValue"
|
||||
)
|
||||
or
|
||||
methodName = "group" and
|
||||
input = "Argument[self].Attribute[string]" and
|
||||
output in ["ReturnValue", "ReturnValue.ListElement"] and
|
||||
preservesValue = false
|
||||
or
|
||||
methodName = "groups" and
|
||||
input = "Argument[self].Attribute[string]" and
|
||||
output = "ReturnValue.ListElement" and
|
||||
preservesValue = false
|
||||
or
|
||||
methodName = "groupdict" and
|
||||
input = "Argument[self].Attribute[string]" and
|
||||
output = "ReturnValue.DictionaryElementAny" and
|
||||
preservesValue = false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A flow summary for `re` methods not returning a `re.Match` object
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html#functions
|
||||
*/
|
||||
class ReFunctionsSummary extends SummarizedCallable {
|
||||
string methodName;
|
||||
|
||||
ReFunctionsSummary() {
|
||||
methodName in ["split", "findall", "finditer", "sub", "subn"] and
|
||||
this = ["re.", "compiled re."] + methodName
|
||||
}
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
this = "re." + methodName and
|
||||
result = API::moduleImport("re").getMember(methodName).getACall()
|
||||
or
|
||||
this = "compiled re." + methodName and
|
||||
result =
|
||||
any(RePatternSummary c)
|
||||
.getACall()
|
||||
.(API::CallNode)
|
||||
.getReturn()
|
||||
.getMember(methodName)
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { none() }
|
||||
|
||||
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
|
||||
exists(int offset |
|
||||
// for non-compiled regex the first argument is the pattern, so we need to
|
||||
// account for this difference
|
||||
this = "re." + methodName and offset = 0
|
||||
or
|
||||
this = "compiled re." + methodName and offset = 1
|
||||
|
|
||||
// flow from input string to results
|
||||
exists(int arg | arg = methodName.(RegexExecutionMethod).getStringArgIndex() - offset |
|
||||
preservesValue = false and
|
||||
input in ["Argument[" + arg + "]", "Argument[string:]"] and
|
||||
(
|
||||
methodName in ["split", "findall", "finditer"] and
|
||||
output = "ReturnValue.ListElement"
|
||||
or
|
||||
// TODO: Since we currently model iterables as tainted when their elements
|
||||
// are, the result of findall, finditer, split needs to be tainted
|
||||
methodName in ["split", "findall", "finditer"] and
|
||||
output = "ReturnValue"
|
||||
or
|
||||
methodName = "sub" and
|
||||
output = "ReturnValue"
|
||||
or
|
||||
methodName = "subn" and
|
||||
output = "ReturnValue.TupleElement[0]"
|
||||
)
|
||||
)
|
||||
or
|
||||
// flow from replacement value for substitution
|
||||
exists(string argumentSpec |
|
||||
argumentSpec in ["Argument[" + (1 - offset) + "]", "Argument[repl:]"] and
|
||||
// `repl` can also be a function
|
||||
input = [argumentSpec, argumentSpec + ".ReturnValue"]
|
||||
|
|
||||
(
|
||||
methodName = "sub" and output = "ReturnValue"
|
||||
or
|
||||
methodName = "subn" and output = "ReturnValue.TupleElement[0]"
|
||||
) and
|
||||
preservesValue = false
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to 're.escape'.
|
||||
* See https://docs.python.org/3/library/re.html#re.escape
|
||||
@@ -4372,6 +4687,141 @@ private module StdlibPrivate {
|
||||
preservesValue = false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A flow summary for `os.getenv` / `os.getenvb`
|
||||
*
|
||||
* See https://devdocs.io/python~3.11/library/os#os.getenv
|
||||
*/
|
||||
class OsGetEnv extends SummarizedCallable {
|
||||
OsGetEnv() { this = "os.getenv" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result = API::moduleImport("os").getMember(["getenv", "getenvb"]).getACall()
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() {
|
||||
result =
|
||||
API::moduleImport("os").getMember(["getenv", "getenvb"]).getAValueReachableFromSource()
|
||||
}
|
||||
|
||||
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
|
||||
input in ["Argument[1]", "Argument[default:]"] and
|
||||
output = "ReturnValue" and
|
||||
preservesValue = true
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// asyncio
|
||||
// ---------------------------------------------------------------------------
|
||||
/** Provides models for the `asyncio` module. */
|
||||
module AsyncIO {
|
||||
/**
|
||||
* A call to the `asyncio.create_subprocess_exec` function (also accessible via the `subprocess` module of `asyncio`)
|
||||
*
|
||||
* See https://docs.python.org/3/library/asyncio-subprocess.html#creating-subprocesses
|
||||
*/
|
||||
private class CreateSubprocessExec extends SystemCommandExecution::Range,
|
||||
FileSystemAccess::Range, API::CallNode
|
||||
{
|
||||
CreateSubprocessExec() {
|
||||
this = API::moduleImport("asyncio").getMember("create_subprocess_exec").getACall()
|
||||
or
|
||||
this =
|
||||
API::moduleImport("asyncio")
|
||||
.getMember("subprocess")
|
||||
.getMember("create_subprocess_exec")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getCommand() { result = this.getParameter(0, "program").asSink() }
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getCommand() }
|
||||
|
||||
override predicate isShellInterpreted(DataFlow::Node arg) {
|
||||
none() // this is a safe API.
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the `asyncio.create_subprocess_shell` function (also accessible via the `subprocess` module of `asyncio`)
|
||||
*
|
||||
* See https://docs.python.org/3/library/asyncio-subprocess.html#asyncio.create_subprocess_shell
|
||||
*/
|
||||
private class CreateSubprocessShell extends SystemCommandExecution::Range,
|
||||
FileSystemAccess::Range, API::CallNode
|
||||
{
|
||||
CreateSubprocessShell() {
|
||||
this = API::moduleImport("asyncio").getMember("create_subprocess_shell").getACall()
|
||||
or
|
||||
this =
|
||||
API::moduleImport("asyncio")
|
||||
.getMember("subprocess")
|
||||
.getMember("create_subprocess_shell")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getCommand() { result = this.getParameter(0, "cmd").asSink() }
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getCommand() }
|
||||
|
||||
override predicate isShellInterpreted(DataFlow::Node arg) { arg = this.getCommand() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an asyncio event loop (an object with basetype `AbstractEventLoop`).
|
||||
*
|
||||
* See https://docs.python.org/3/library/asyncio-eventloop.html
|
||||
*/
|
||||
private API::Node getAsyncioEventLoop() {
|
||||
result = API::moduleImport("asyncio").getMember("get_running_loop").getReturn()
|
||||
or
|
||||
result = API::moduleImport("asyncio").getMember("get_event_loop").getReturn() // deprecated in Python 3.10.0 and later
|
||||
or
|
||||
result = API::moduleImport("asyncio").getMember("new_event_loop").getReturn()
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `subprocess_exec` on an event loop instance.
|
||||
*
|
||||
* See https://docs.python.org/3/library/asyncio-eventloop.html#asyncio.loop.subprocess_exec
|
||||
*/
|
||||
private class EventLoopSubprocessExec extends API::CallNode, SystemCommandExecution::Range,
|
||||
FileSystemAccess::Range
|
||||
{
|
||||
EventLoopSubprocessExec() {
|
||||
this = getAsyncioEventLoop().getMember("subprocess_exec").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getCommand() { result = this.getArg(1) }
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getCommand() }
|
||||
|
||||
override predicate isShellInterpreted(DataFlow::Node arg) {
|
||||
none() // this is a safe API.
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `subprocess_shell` on an event loop instance.
|
||||
*
|
||||
* See https://docs.python.org/3/library/asyncio-eventloop.html#asyncio.loop.subprocess_shell
|
||||
*/
|
||||
private class EventLoopSubprocessShell extends API::CallNode, SystemCommandExecution::Range,
|
||||
FileSystemAccess::Range
|
||||
{
|
||||
EventLoopSubprocessShell() {
|
||||
this = getAsyncioEventLoop().getMember("subprocess_shell").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getCommand() { result = this.getParameter(1, "cmd").asSink() }
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getCommand() }
|
||||
|
||||
override predicate isShellInterpreted(DataFlow::Node arg) { arg = this.getCommand() }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -416,7 +416,10 @@ module Tornado {
|
||||
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
|
||||
exists(Function requestHandler | requestHandler = this.getARequestHandler() |
|
||||
not exists(this.getUrlPattern()) and
|
||||
result in [requestHandler.getArg(_), requestHandler.getArgByName(_)] and
|
||||
result in [
|
||||
requestHandler.getArg(_), requestHandler.getArgByName(_),
|
||||
requestHandler.getVararg().(Parameter), requestHandler.getKwarg().(Parameter)
|
||||
] and
|
||||
not result = requestHandler.getArg(0)
|
||||
)
|
||||
or
|
||||
@@ -429,6 +432,12 @@ module Tornado {
|
||||
result = requestHandler.getArg(regex.getGroupNumber(_, _))
|
||||
or
|
||||
result = requestHandler.getArgByName(regex.getGroupName(_, _))
|
||||
or
|
||||
exists(regex.getGroupNumber(_, _)) and
|
||||
result = requestHandler.getVararg()
|
||||
or
|
||||
exists(regex.getGroupName(_, _)) and
|
||||
result = requestHandler.getKwarg()
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -446,7 +455,10 @@ module Tornado {
|
||||
// Since we don't know the URL pattern, we simply mark all parameters as a routed
|
||||
// parameter. This should give us more RemoteFlowSources but could also lead to
|
||||
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
|
||||
result in [this.getArg(_), this.getArgByName(_)] and
|
||||
result in [
|
||||
this.getArg(_), this.getArgByName(_), this.getVararg().(Parameter),
|
||||
this.getKwarg().(Parameter)
|
||||
] and
|
||||
not result = this.getArg(0)
|
||||
}
|
||||
|
||||
|
||||
@@ -454,6 +454,14 @@ private API::Node getNodeFromPath(string type, AccessPath path, int n) {
|
||||
or
|
||||
// Apply a type step
|
||||
typeStep(getNodeFromPath(type, path, n), result)
|
||||
or
|
||||
// Apply a fuzzy step (without advancing 'n')
|
||||
path.getToken(n).getName() = "Fuzzy" and
|
||||
result = Specific::getAFuzzySuccessor(getNodeFromPath(type, path, n))
|
||||
or
|
||||
// Skip a fuzzy step (advance 'n' without changing the current node)
|
||||
path.getToken(n - 1).getName() = "Fuzzy" and
|
||||
result = getNodeFromPath(type, path, n - 1)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -500,6 +508,14 @@ private API::Node getNodeFromSubPath(API::Node base, AccessPath subPath, int n)
|
||||
// will themselves find by following type-steps.
|
||||
n > 0 and
|
||||
n < subPath.getNumToken()
|
||||
or
|
||||
// Apply a fuzzy step (without advancing 'n')
|
||||
subPath.getToken(n).getName() = "Fuzzy" and
|
||||
result = Specific::getAFuzzySuccessor(getNodeFromSubPath(base, subPath, n))
|
||||
or
|
||||
// Skip a fuzzy step (advance 'n' without changing the current node)
|
||||
subPath.getToken(n - 1).getName() = "Fuzzy" and
|
||||
result = getNodeFromSubPath(base, subPath, n - 1)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -561,7 +577,7 @@ private Specific::InvokeNode getInvocationFromPath(string type, AccessPath path)
|
||||
*/
|
||||
bindingset[name]
|
||||
private predicate isValidTokenNameInIdentifyingAccessPath(string name) {
|
||||
name = ["Argument", "Parameter", "ReturnValue", "WithArity", "TypeVar"]
|
||||
name = ["Argument", "Parameter", "ReturnValue", "WithArity", "TypeVar", "Fuzzy"]
|
||||
or
|
||||
Specific::isExtraValidTokenNameInIdentifyingAccessPath(name)
|
||||
}
|
||||
@@ -572,7 +588,7 @@ private predicate isValidTokenNameInIdentifyingAccessPath(string name) {
|
||||
*/
|
||||
bindingset[name]
|
||||
private predicate isValidNoArgumentTokenInIdentifyingAccessPath(string name) {
|
||||
name = "ReturnValue"
|
||||
name = ["ReturnValue", "Fuzzy"]
|
||||
or
|
||||
Specific::isExtraValidNoArgumentTokenInIdentifyingAccessPath(name)
|
||||
}
|
||||
@@ -643,6 +659,15 @@ module ModelOutput {
|
||||
baseNode = getInvocationFromPath(type, path)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if a `baseNode` is a callable identified by the `type,path` part of a summary row.
|
||||
*/
|
||||
cached
|
||||
predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) {
|
||||
summaryModel(type, path, _, _, _) and
|
||||
baseNode = getNodeFromPath(type, path)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `node` is seen as an instance of `type` due to a type definition
|
||||
* contributed by a CSV model.
|
||||
@@ -653,6 +678,17 @@ module ModelOutput {
|
||||
|
||||
import Cached
|
||||
import Specific::ModelOutputSpecific
|
||||
private import codeql.mad.ModelValidation as SharedModelVal
|
||||
|
||||
private module KindValConfig implements SharedModelVal::KindValidationConfigSig {
|
||||
predicate summaryKind(string kind) { summaryModel(_, _, _, _, kind) }
|
||||
|
||||
predicate sinkKind(string kind) { sinkModel(_, _, kind) }
|
||||
|
||||
predicate sourceKind(string kind) { sourceModel(_, _, kind) }
|
||||
}
|
||||
|
||||
private module KindVal = SharedModelVal::KindValidation<KindValConfig>;
|
||||
|
||||
/**
|
||||
* Gets an error message relating to an invalid CSV row in a model.
|
||||
@@ -698,5 +734,8 @@ module ModelOutput {
|
||||
not isValidNoArgumentTokenInIdentifyingAccessPath(token.getName()) and
|
||||
result = "Invalid token '" + token + "' is missing its arguments, in access path: " + path
|
||||
)
|
||||
or
|
||||
// Check for invalid model kinds
|
||||
result = KindVal::getInvalidModelKind()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ extensible predicate sourceModel(string type, string path, string kind);
|
||||
extensible predicate sinkModel(string type, string path, string kind);
|
||||
|
||||
/**
|
||||
* Holds if calls to `(type, path)`, the value referred to by `input`
|
||||
* Holds if in calls to `(type, path)`, the value referred to by `input`
|
||||
* can flow to the value referred to by `output`.
|
||||
*
|
||||
* `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps,
|
||||
@@ -25,6 +25,13 @@ extensible predicate sinkModel(string type, string path, string kind);
|
||||
*/
|
||||
extensible predicate summaryModel(string type, string path, string input, string output, string kind);
|
||||
|
||||
/**
|
||||
* Holds if calls to `(type, path)` should be considered neutral. The meaning of this depends on the `kind`.
|
||||
* If `kind` is `summary`, the call does not propagate data flow. If `kind` is `source`, the call is not a source.
|
||||
* If `kind` is `sink`, the call is not a sink.
|
||||
*/
|
||||
extensible predicate neutralModel(string type, string path, string kind);
|
||||
|
||||
/**
|
||||
* Holds if `(type2, path)` should be seen as an instance of `type1`.
|
||||
*/
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user