From 4ab61bb0882461fe5e8d22587fe4abcdfcbb00dd Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 2 Feb 2021 13:43:21 +0100 Subject: [PATCH 01/25] Python: Add a few tests for crypto frameworks Tests working can be verified by running ``` ls ql/python/ql/test/experimental/library-tests/frameworks/crypto*/*.py | xargs -L1 sh -c 'python $0 || exit 255' ``` --- .../frameworks/crypto/ConceptsTest.expected | 0 .../frameworks/crypto/ConceptsTest.ql | 2 + .../frameworks/crypto/test_dsa.py | 41 ++++++++++ .../frameworks/crypto/test_ec.py | 38 +++++++++ .../frameworks/crypto/test_rsa.py | 70 ++++++++++++++++ .../cryptodome/ConceptsTest.expected | 0 .../frameworks/cryptodome/ConceptsTest.ql | 2 + .../frameworks/cryptodome/test_dsa.py | 41 ++++++++++ .../frameworks/cryptodome/test_ec.py | 38 +++++++++ .../frameworks/cryptodome/test_rsa.py | 70 ++++++++++++++++ .../cryptography/ConceptsTest.expected | 0 .../frameworks/cryptography/ConceptsTest.ql | 2 + .../frameworks/cryptography/test_dsa.py | 37 +++++++++ .../frameworks/cryptography/test_ec.py | 43 ++++++++++ .../frameworks/cryptography/test_rsa.py | 80 +++++++++++++++++++ 15 files changed, 464 insertions(+) create mode 100644 python/ql/test/experimental/library-tests/frameworks/crypto/ConceptsTest.expected create mode 100644 python/ql/test/experimental/library-tests/frameworks/crypto/ConceptsTest.ql create mode 100644 python/ql/test/experimental/library-tests/frameworks/crypto/test_dsa.py create mode 100644 python/ql/test/experimental/library-tests/frameworks/crypto/test_ec.py create mode 100644 python/ql/test/experimental/library-tests/frameworks/crypto/test_rsa.py create mode 100644 python/ql/test/experimental/library-tests/frameworks/cryptodome/ConceptsTest.expected create mode 100644 python/ql/test/experimental/library-tests/frameworks/cryptodome/ConceptsTest.ql create mode 100644 python/ql/test/experimental/library-tests/frameworks/cryptodome/test_dsa.py create mode 100644 python/ql/test/experimental/library-tests/frameworks/cryptodome/test_ec.py create mode 100644 python/ql/test/experimental/library-tests/frameworks/cryptodome/test_rsa.py create mode 100644 python/ql/test/experimental/library-tests/frameworks/cryptography/ConceptsTest.expected create mode 100644 python/ql/test/experimental/library-tests/frameworks/cryptography/ConceptsTest.ql create mode 100644 python/ql/test/experimental/library-tests/frameworks/cryptography/test_dsa.py create mode 100644 python/ql/test/experimental/library-tests/frameworks/cryptography/test_ec.py create mode 100644 python/ql/test/experimental/library-tests/frameworks/cryptography/test_rsa.py diff --git a/python/ql/test/experimental/library-tests/frameworks/crypto/ConceptsTest.expected b/python/ql/test/experimental/library-tests/frameworks/crypto/ConceptsTest.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/experimental/library-tests/frameworks/crypto/ConceptsTest.ql b/python/ql/test/experimental/library-tests/frameworks/crypto/ConceptsTest.ql new file mode 100644 index 00000000000..b557a0bccb6 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/crypto/ConceptsTest.ql @@ -0,0 +1,2 @@ +import python +import experimental.meta.ConceptsTest diff --git a/python/ql/test/experimental/library-tests/frameworks/crypto/test_dsa.py b/python/ql/test/experimental/library-tests/frameworks/crypto/test_dsa.py new file mode 100644 index 00000000000..dd8a9f68d72 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/crypto/test_dsa.py @@ -0,0 +1,41 @@ +# DSA is a public-key algorithm for signing messages. +# Following example at https://pycryptodome.readthedocs.io/en/latest/src/signature/dsa.html + +from Crypto.PublicKey import DSA +from Crypto.Signature import DSS +from Crypto.Hash import SHA256 + + +private_key = DSA.generate(2048) +public_key = private_key.publickey() + +# ------------------------------------------------------------------------------ +# sign/verify +# ------------------------------------------------------------------------------ + +print("sign/verify") + + +message = b"message" + +signer = DSS.new(private_key, mode='fips-186-3') + +hasher = SHA256.new(message) +signature = signer.sign(hasher) + +print("signature={}".format(signature)) + +print() + +verifier = DSS.new(public_key, mode='fips-186-3') + +hasher = SHA256.new(message) +verifier.verify(hasher, signature) +print("Signature verified (as expected)") + +try: + hasher = SHA256.new(b"other message") + verifier.verify(hasher, signature) + raise Exception("Signature verified (unexpected)") +except ValueError: + print("Signature mismatch (as expected)") diff --git a/python/ql/test/experimental/library-tests/frameworks/crypto/test_ec.py b/python/ql/test/experimental/library-tests/frameworks/crypto/test_ec.py new file mode 100644 index 00000000000..0c7d17e8b81 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/crypto/test_ec.py @@ -0,0 +1,38 @@ +from Crypto.PublicKey import ECC +from Crypto.Signature import DSS +from Crypto.Hash import SHA256 + + +private_key = ECC.generate(curve="P-256") +public_key = private_key.public_key() + +# ------------------------------------------------------------------------------ +# sign/verify +# ------------------------------------------------------------------------------ + +print("sign/verify") + + +message = b"message" + +signer = DSS.new(private_key, mode='fips-186-3') + +hasher = SHA256.new(message) +signature = signer.sign(hasher) + +print("signature={}".format(signature)) + +print() + +verifier = DSS.new(public_key, mode='fips-186-3') + +hasher = SHA256.new(message) +verifier.verify(hasher, signature) +print("Signature verified (as expected)") + +try: + hasher = SHA256.new(b"other message") + verifier.verify(hasher, signature) + raise Exception("Signature verified (unexpected)") +except ValueError: + print("Signature mismatch (as expected)") diff --git a/python/ql/test/experimental/library-tests/frameworks/crypto/test_rsa.py b/python/ql/test/experimental/library-tests/frameworks/crypto/test_rsa.py new file mode 100644 index 00000000000..cc384263c96 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/crypto/test_rsa.py @@ -0,0 +1,70 @@ +# RSA is a public-key algorithm for encrypting and signing messages. + +from Crypto.PublicKey import RSA +from Crypto.Cipher import PKCS1_OAEP +from Crypto.Signature import pss +from Crypto.Hash import SHA256 + +private_key = RSA.generate(2048) +public_key = private_key.publickey() + +# ------------------------------------------------------------------------------ +# encrypt/decrypt +# ------------------------------------------------------------------------------ + +print("encrypt/decrypt") + +secret_message = b"secret message" + +# Following example at https://pycryptodome.readthedocs.io/en/latest/src/examples.html#encrypt-data-with-rsa + +encrypt_cipher = PKCS1_OAEP.new(public_key) + +encrypted = encrypt_cipher.encrypt(secret_message) + +print("encrypted={}".format(encrypted)) + +print() + +decrypt_cipher = PKCS1_OAEP.new(private_key) + +decrypted = decrypt_cipher.decrypt( + encrypted, +) + +print("decrypted={}".format(decrypted)) +assert decrypted == secret_message + +print("\n---\n") + +# ------------------------------------------------------------------------------ +# sign/verify +# ------------------------------------------------------------------------------ + +print("sign/verify") + + +message = b"message" + +signer = pss.new(private_key) + +hasher = SHA256.new(message) +signature = signer.sign(hasher) + +print("signature={}".format(signature)) + +print() + + +verifier = pss.new(public_key) +hasher = SHA256.new(message) +verifier.verify(hasher, signature) +print("Signature verified (as expected)") + +try: + verifier = pss.new(public_key) + hasher = SHA256.new(b"other message") + verifier.verify(hasher, signature) + raise Exception("Signature verified (unexpected)") +except ValueError: + print("Signature mismatch (as expected)") diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptodome/ConceptsTest.expected b/python/ql/test/experimental/library-tests/frameworks/cryptodome/ConceptsTest.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptodome/ConceptsTest.ql b/python/ql/test/experimental/library-tests/frameworks/cryptodome/ConceptsTest.ql new file mode 100644 index 00000000000..b557a0bccb6 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/cryptodome/ConceptsTest.ql @@ -0,0 +1,2 @@ +import python +import experimental.meta.ConceptsTest diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_dsa.py b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_dsa.py new file mode 100644 index 00000000000..8c5b4e2e519 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_dsa.py @@ -0,0 +1,41 @@ +# DSA is a public-key algorithm for signing messages. +# Following example at https://pycryptodome.readthedocs.io/en/latest/src/signature/dsa.html + +from Cryptodome.PublicKey import DSA +from Cryptodome.Signature import DSS +from Cryptodome.Hash import SHA256 + + +private_key = DSA.generate(2048) +public_key = private_key.publickey() + +# ------------------------------------------------------------------------------ +# sign/verify +# ------------------------------------------------------------------------------ + +print("sign/verify") + + +message = b"message" + +signer = DSS.new(private_key, mode='fips-186-3') + +hasher = SHA256.new(message) +signature = signer.sign(hasher) + +print("signature={}".format(signature)) + +print() + +verifier = DSS.new(public_key, mode='fips-186-3') + +hasher = SHA256.new(message) +verifier.verify(hasher, signature) +print("Signature verified (as expected)") + +try: + hasher = SHA256.new(b"other message") + verifier.verify(hasher, signature) + raise Exception("Signature verified (unexpected)") +except ValueError: + print("Signature mismatch (as expected)") diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_ec.py b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_ec.py new file mode 100644 index 00000000000..781d1dc68ac --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_ec.py @@ -0,0 +1,38 @@ +from Cryptodome.PublicKey import ECC +from Cryptodome.Signature import DSS +from Cryptodome.Hash import SHA256 + + +private_key = ECC.generate(curve="P-256") +public_key = private_key.public_key() + +# ------------------------------------------------------------------------------ +# sign/verify +# ------------------------------------------------------------------------------ + +print("sign/verify") + + +message = b"message" + +signer = DSS.new(private_key, mode='fips-186-3') + +hasher = SHA256.new(message) +signature = signer.sign(hasher) + +print("signature={}".format(signature)) + +print() + +verifier = DSS.new(public_key, mode='fips-186-3') + +hasher = SHA256.new(message) +verifier.verify(hasher, signature) +print("Signature verified (as expected)") + +try: + hasher = SHA256.new(b"other message") + verifier.verify(hasher, signature) + raise Exception("Signature verified (unexpected)") +except ValueError: + print("Signature mismatch (as expected)") diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_rsa.py b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_rsa.py new file mode 100644 index 00000000000..807c1dded51 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_rsa.py @@ -0,0 +1,70 @@ +# RSA is a public-key algorithm for encrypting and signing messages. + +from Cryptodome.PublicKey import RSA +from Cryptodome.Cipher import PKCS1_OAEP +from Cryptodome.Signature import pss +from Cryptodome.Hash import SHA256 + +private_key = RSA.generate(2048) +public_key = private_key.publickey() + +# ------------------------------------------------------------------------------ +# encrypt/decrypt +# ------------------------------------------------------------------------------ + +print("encrypt/decrypt") + +secret_message = b"secret message" + +# Following example at https://pycryptodome.readthedocs.io/en/latest/src/examples.html#encrypt-data-with-rsa + +encrypt_cipher = PKCS1_OAEP.new(public_key) + +encrypted = encrypt_cipher.encrypt(secret_message) + +print("encrypted={}".format(encrypted)) + +print() + +decrypt_cipher = PKCS1_OAEP.new(private_key) + +decrypted = decrypt_cipher.decrypt( + encrypted, +) + +print("decrypted={}".format(decrypted)) +assert decrypted == secret_message + +print("\n---\n") + +# ------------------------------------------------------------------------------ +# sign/verify +# ------------------------------------------------------------------------------ + +print("sign/verify") + + +message = b"message" + +signer = pss.new(private_key) + +hasher = SHA256.new(message) +signature = signer.sign(hasher) + +print("signature={}".format(signature)) + +print() + +verifier = pss.new(public_key) + +hasher = SHA256.new(message) +verifier.verify(hasher, signature) +print("Signature verified (as expected)") + +try: + verifier = pss.new(public_key) + hasher = SHA256.new(b"other message") + verifier.verify(hasher, signature) + raise Exception("Signature verified (unexpected)") +except ValueError: + print("Signature mismatch (as expected)") diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptography/ConceptsTest.expected b/python/ql/test/experimental/library-tests/frameworks/cryptography/ConceptsTest.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptography/ConceptsTest.ql b/python/ql/test/experimental/library-tests/frameworks/cryptography/ConceptsTest.ql new file mode 100644 index 00000000000..b557a0bccb6 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/cryptography/ConceptsTest.ql @@ -0,0 +1,2 @@ +import python +import experimental.meta.ConceptsTest diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptography/test_dsa.py b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_dsa.py new file mode 100644 index 00000000000..31ca224b52f --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_dsa.py @@ -0,0 +1,37 @@ +# DSA is a public-key algorithm for signing messages. +# see https://cryptography.io/en/latest/hazmat/primitives/asymmetric/dsa.html + +from cryptography.hazmat.primitives.asymmetric import dsa +from cryptography.hazmat.primitives import hashes +from cryptography.exceptions import InvalidSignature + +HASH_ALGORITHM = hashes.SHA256() + +private_key = dsa.generate_private_key(key_size=2048) +public_key = private_key.public_key() + +message = b"message" + +# Following example at https://cryptography.io/en/latest/hazmat/primitives/asymmetric/dsa.html#signing + +signature = private_key.sign( + message, + algorithm=HASH_ALGORITHM, +) + +print("signature={}".format(signature)) + +print() + +public_key.verify( + signature, message, algorithm=HASH_ALGORITHM +) +print("Signature verified (as expected)") + +try: + public_key.verify( + signature, b"other message", algorithm=HASH_ALGORITHM + ) + raise Exception("Signature verified (unexpected)") +except InvalidSignature: + print("Signature mismatch (as expected)") diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptography/test_ec.py b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_ec.py new file mode 100644 index 00000000000..9f810b261cb --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_ec.py @@ -0,0 +1,43 @@ +# see https://cryptography.io/en/latest/hazmat/primitives/asymmetric/rsa.html + +from cryptography.hazmat.primitives.asymmetric import ec +from cryptography.hazmat.primitives import hashes +from cryptography.exceptions import InvalidSignature + + +private_key = ec.generate_private_key(curve=ec.SECP384R1()) +public_key = private_key.public_key() + +HASH_ALGORITHM = hashes.SHA256() + +# ------------------------------------------------------------------------------ +# sign/verify +# ------------------------------------------------------------------------------ + +print("sign/verify") + +SIGNATURE_ALGORITHM = ec.ECDSA(HASH_ALGORITHM) + +message = b"message" + +signature = private_key.sign( + message, + signature_algorithm=SIGNATURE_ALGORITHM, +) + +print("signature={}".format(signature)) + +print() + +public_key.verify( + signature, message, signature_algorithm=SIGNATURE_ALGORITHM +) +print("Signature verified (as expected)") + +try: + public_key.verify( + signature, b"other message", signature_algorithm=SIGNATURE_ALGORITHM + ) + raise Exception("Signature verified (unexpected)") +except InvalidSignature: + print("Signature mismatch (as expected)") diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptography/test_rsa.py b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_rsa.py new file mode 100644 index 00000000000..107b60214a9 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_rsa.py @@ -0,0 +1,80 @@ +# RSA is a public-key algorithm for encrypting and signing messages. +# see https://cryptography.io/en/latest/hazmat/primitives/asymmetric/rsa.html + +from cryptography.hazmat.primitives.asymmetric import rsa, padding +from cryptography.hazmat.primitives import hashes +from cryptography.exceptions import InvalidSignature + + +private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048) +public_key = private_key.public_key() + +HASH_ALGORITHM = hashes.SHA256() + +# ------------------------------------------------------------------------------ +# encrypt/decrypt +# ------------------------------------------------------------------------------ + +print("encrypt/decrypt") + +ENCRYPT_PADDING = padding.OAEP( + mgf=padding.MGF1(algorithm=HASH_ALGORITHM), + algorithm=HASH_ALGORITHM, + label=None, +) + + +secret_message = b"secret message" + +# Following example at https://cryptography.io/en/latest/hazmat/primitives/asymmetric/rsa.html#encryption +encrypted = public_key.encrypt(secret_message, padding=ENCRYPT_PADDING) + +print("encrypted={}".format(encrypted)) + +print() + +decrypted = private_key.decrypt( + encrypted, + padding=ENCRYPT_PADDING +) + +print("decrypted={}".format(decrypted)) +assert decrypted == secret_message + +print("\n---\n") + +# ------------------------------------------------------------------------------ +# sign/verify +# ------------------------------------------------------------------------------ + +print("sign/verify") + +SIGN_PADDING = padding.PSS( + mgf=padding.MGF1(HASH_ALGORITHM), + salt_length=padding.PSS.MAX_LENGTH +) + +message = b"message" + +signature = private_key.sign( + message, + padding=SIGN_PADDING, + algorithm=HASH_ALGORITHM, +) + +print("signature={}".format(signature)) + +print() + +public_key.verify( + signature, message, padding=SIGN_PADDING, algorithm=HASH_ALGORITHM +) +print("Signature verified (as expected)") + +try: + public_key.verify( + signature, b"other message", padding=SIGN_PADDING, algorithm=HASH_ALGORITHM + ) + raise Exception("Signature verified (unexpected)") +except InvalidSignature: + print("Signature mismatch (as expected)") From 11cd0dbbc08a5d4ee31b7a8e5a4947cc947def21 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 2 Feb 2021 16:20:04 +0100 Subject: [PATCH 02/25] Python: Add concepts for public-key generation I did spend some time to figure out how to best write `minimumSecureKeySize` predicate. I wanted to write once and for all the recommended sizes for each cryptosystem. I considered making the predicate such as ```codeql int minimumSecureKeySize() { this.getName() = "RSA" and result = 2048 or this.getName() = "DSA" and result = 2048 or this.getName() = "ECC" and result = 244 } ``` but then it would be impossible to add a new model without also being able to modify the body of this predicate -- which seems like a bad way to start off a brand new way of modeling things. So I considered if we could add it to the non-range class, such as ```codeql class RSAKeyGeneration extends KeyGeneration { RSAKeyGeneration() { this.getName() = "RSA" } override int minimumSecureKeySize() { result = 2048 } } ``` This has the major problem that when you're writing the models for a new API (and therefore extending KeyGeneration::Range), there is no way for you to see that you need to take this extra step :| (also problem about how we should define `minimumSecureKeySize` on `KeyGeneration` class then, since if we make it abstract, we effectively disable the ability to refine `KeyGeneration` since any subclass must provide an implementation.) So, therefore I ended up with this solution ;) --- python/ql/src/semmle/python/Concepts.qll | 87 +++++++++++++++++++ .../test/experimental/meta/ConceptsTest.qll | 22 +++++ 2 files changed, 109 insertions(+) diff --git a/python/ql/src/semmle/python/Concepts.qll b/python/ql/src/semmle/python/Concepts.qll index c6d1ce367e9..1f04b077834 100644 --- a/python/ql/src/semmle/python/Concepts.qll +++ b/python/ql/src/semmle/python/Concepts.qll @@ -526,3 +526,90 @@ module HTTP { } } } + +/** Provides models for cryptographic things. */ +module Cryptography { + /** Provides models for public-key cryptography, also called asymmetric cryptography. */ + module PublicKey { + /** + * A data-flow node that generates a new key-pair for use with public-key cryptography. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `KeyGeneration::Range` instead. + */ + class KeyGeneration extends DataFlow::Node { + KeyGeneration::Range range; + + KeyGeneration() { this = range } + + /** Gets the name of the cryptographic algorithm (for example `"RSA"` or `"AES"`). */ + string getName() { result = range.getName() } + + /** Gets the argument that specifies size of the key in bits, if available. */ + DataFlow::Node getKeySizeArg() { result = range.getKeySizeArg() } + + /** + * Gets the size of the key generated (in bits), as well as the `origin` that + * explains how we obtained this specific key size. + */ + int getKeySizeWithOrigin(DataFlow::Node origin) { + result = range.getKeySizeWithOrigin(origin) + } + + /** Gets the minimum key size (in bits) for this algorithm to be considered secure. */ + int minimumSecureKeySize() { result = range.minimumSecureKeySize() } + } + + /** Provides classes for modeling new key-pair generation APIs. */ + module KeyGeneration { + /** + * A data-flow node that generates a new key-pair for use with public-key cryptography. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `KeyGeneration` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets the name of the cryptographic algorithm (for example `"RSA"`). */ + abstract string getName(); + + /** Gets the argument that specifies size of the key in bits, if available. */ + abstract DataFlow::Node getKeySizeArg(); + + /** + * Gets the size of the key generated (in bits), as well as the `origin` that + * explains how we obtained this specific key size. + */ + int getKeySizeWithOrigin(DataFlow::Node origin) { + exists(IntegerLiteral size | origin = DataFlow::exprNode(size) | + origin.(DataFlow::LocalSourceNode).flowsTo(this.getKeySizeArg()) and + result = size.getValue() + ) + } + + /** Gets the minimum key size (in bits) for this algorithm to be considered secure. */ + abstract int minimumSecureKeySize(); + } + + /** A data-flow node that generates a new RSA key-pair. */ + abstract class RSARange extends Range { + override string getName() { result = "RSA" } + + override int minimumSecureKeySize() { result = 2048 } + } + + /** A data-flow node that generates a new DSA key-pair. */ + abstract class DSARange extends Range { + override string getName() { result = "DSA" } + + override int minimumSecureKeySize() { result = 2048 } + } + + /** A data-flow node that generates a new ECC key-pair. */ + abstract class ECCRange extends Range { + override string getName() { result = "ECC" } + + override int minimumSecureKeySize() { result = 224 } + } + } + } +} diff --git a/python/ql/test/experimental/meta/ConceptsTest.qll b/python/ql/test/experimental/meta/ConceptsTest.qll index eafcb8b0ef9..3800a4cd273 100644 --- a/python/ql/test/experimental/meta/ConceptsTest.qll +++ b/python/ql/test/experimental/meta/ConceptsTest.qll @@ -319,3 +319,25 @@ class SafeAccessCheckTest extends InlineExpectationsTest { ) } } + +class PublicKeyGenerationTest extends InlineExpectationsTest { + PublicKeyGenerationTest() { this = "PublicKeyGenerationTest" } + + override string getARelevantTag() { result in ["PublicKeyGeneration", "keySize"] } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(location.getFile().getRelativePath()) and + exists(Cryptography::PublicKey::KeyGeneration keyGen | + location = keyGen.getLocation() and + ( + element = keyGen.toString() and + value = "" and + tag = "PublicKeyGeneration" + or + element = keyGen.toString() and + value = keyGen.getKeySizeWithOrigin(_).toString() and + tag = "keySize" + ) + ) + } +} From 1bf9f7d1353a30a8355c9e67e0bacdf8bec0706a Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 2 Feb 2021 16:30:18 +0100 Subject: [PATCH 03/25] Python: Add missing annotations to new crypto tests --- .../experimental/library-tests/frameworks/crypto/test_dsa.py | 2 +- .../experimental/library-tests/frameworks/crypto/test_ec.py | 2 +- .../experimental/library-tests/frameworks/crypto/test_rsa.py | 2 +- .../library-tests/frameworks/cryptodome/test_dsa.py | 2 +- .../experimental/library-tests/frameworks/cryptodome/test_ec.py | 2 +- .../library-tests/frameworks/cryptodome/test_rsa.py | 2 +- .../library-tests/frameworks/cryptography/test_dsa.py | 2 +- .../library-tests/frameworks/cryptography/test_ec.py | 2 +- .../library-tests/frameworks/cryptography/test_rsa.py | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/python/ql/test/experimental/library-tests/frameworks/crypto/test_dsa.py b/python/ql/test/experimental/library-tests/frameworks/crypto/test_dsa.py index dd8a9f68d72..9d58b375187 100644 --- a/python/ql/test/experimental/library-tests/frameworks/crypto/test_dsa.py +++ b/python/ql/test/experimental/library-tests/frameworks/crypto/test_dsa.py @@ -6,7 +6,7 @@ from Crypto.Signature import DSS from Crypto.Hash import SHA256 -private_key = DSA.generate(2048) +private_key = DSA.generate(2048) # $ MISSING: PublicKeyGeneration keySize=2048 public_key = private_key.publickey() # ------------------------------------------------------------------------------ diff --git a/python/ql/test/experimental/library-tests/frameworks/crypto/test_ec.py b/python/ql/test/experimental/library-tests/frameworks/crypto/test_ec.py index 0c7d17e8b81..fdf17571232 100644 --- a/python/ql/test/experimental/library-tests/frameworks/crypto/test_ec.py +++ b/python/ql/test/experimental/library-tests/frameworks/crypto/test_ec.py @@ -3,7 +3,7 @@ from Crypto.Signature import DSS from Crypto.Hash import SHA256 -private_key = ECC.generate(curve="P-256") +private_key = ECC.generate(curve="P-256") # $ MISSING: PublicKeyGeneration keySize=256 public_key = private_key.public_key() # ------------------------------------------------------------------------------ diff --git a/python/ql/test/experimental/library-tests/frameworks/crypto/test_rsa.py b/python/ql/test/experimental/library-tests/frameworks/crypto/test_rsa.py index cc384263c96..68cac4d7ad6 100644 --- a/python/ql/test/experimental/library-tests/frameworks/crypto/test_rsa.py +++ b/python/ql/test/experimental/library-tests/frameworks/crypto/test_rsa.py @@ -5,7 +5,7 @@ from Crypto.Cipher import PKCS1_OAEP from Crypto.Signature import pss from Crypto.Hash import SHA256 -private_key = RSA.generate(2048) +private_key = RSA.generate(2048) # $ MISSING: PublicKeyGeneration keySize=2048 public_key = private_key.publickey() # ------------------------------------------------------------------------------ diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_dsa.py b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_dsa.py index 8c5b4e2e519..044f7c28df9 100644 --- a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_dsa.py +++ b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_dsa.py @@ -6,7 +6,7 @@ from Cryptodome.Signature import DSS from Cryptodome.Hash import SHA256 -private_key = DSA.generate(2048) +private_key = DSA.generate(2048) # $ MISSING: PublicKeyGeneration keySize=2048 public_key = private_key.publickey() # ------------------------------------------------------------------------------ diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_ec.py b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_ec.py index 781d1dc68ac..b1728dd9629 100644 --- a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_ec.py +++ b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_ec.py @@ -3,7 +3,7 @@ from Cryptodome.Signature import DSS from Cryptodome.Hash import SHA256 -private_key = ECC.generate(curve="P-256") +private_key = ECC.generate(curve="P-256") # $ MISSING: PublicKeyGeneration keySize=256 public_key = private_key.public_key() # ------------------------------------------------------------------------------ diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_rsa.py b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_rsa.py index 807c1dded51..19cea87d600 100644 --- a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_rsa.py +++ b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_rsa.py @@ -5,7 +5,7 @@ from Cryptodome.Cipher import PKCS1_OAEP from Cryptodome.Signature import pss from Cryptodome.Hash import SHA256 -private_key = RSA.generate(2048) +private_key = RSA.generate(2048) # $ MISSING: PublicKeyGeneration keySize=2048 public_key = private_key.publickey() # ------------------------------------------------------------------------------ diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptography/test_dsa.py b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_dsa.py index 31ca224b52f..af244a044e6 100644 --- a/python/ql/test/experimental/library-tests/frameworks/cryptography/test_dsa.py +++ b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_dsa.py @@ -7,7 +7,7 @@ from cryptography.exceptions import InvalidSignature HASH_ALGORITHM = hashes.SHA256() -private_key = dsa.generate_private_key(key_size=2048) +private_key = dsa.generate_private_key(key_size=2048) # $ MISSING: PublicKeyGeneration keySize=2048 public_key = private_key.public_key() message = b"message" diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptography/test_ec.py b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_ec.py index 9f810b261cb..e62d5f2242b 100644 --- a/python/ql/test/experimental/library-tests/frameworks/cryptography/test_ec.py +++ b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_ec.py @@ -5,7 +5,7 @@ from cryptography.hazmat.primitives import hashes from cryptography.exceptions import InvalidSignature -private_key = ec.generate_private_key(curve=ec.SECP384R1()) +private_key = ec.generate_private_key(curve=ec.SECP384R1()) # $ MISSING: PublicKeyGeneration keySize=384 public_key = private_key.public_key() HASH_ALGORITHM = hashes.SHA256() diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptography/test_rsa.py b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_rsa.py index 107b60214a9..8e7553dfa4d 100644 --- a/python/ql/test/experimental/library-tests/frameworks/cryptography/test_rsa.py +++ b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_rsa.py @@ -6,7 +6,7 @@ from cryptography.hazmat.primitives import hashes from cryptography.exceptions import InvalidSignature -private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048) +private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048) # $ MISSING: PublicKeyGeneration keySize=2048 public_key = private_key.public_key() HASH_ALGORITHM = hashes.SHA256() From bd40965afe3dd1b976b6ec2527871fe93d4905f4 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 2 Feb 2021 16:31:20 +0100 Subject: [PATCH 04/25] Python: Add modeling for `cryptography` PyPI package --- python/ql/src/semmle/python/Frameworks.qll | 1 + .../semmle/python/frameworks/Cryptography.qll | 516 ++++++++++++++++++ .../frameworks/cryptography/test_dsa.py | 2 +- .../frameworks/cryptography/test_ec.py | 2 +- .../frameworks/cryptography/test_rsa.py | 2 +- 5 files changed, 520 insertions(+), 3 deletions(-) create mode 100644 python/ql/src/semmle/python/frameworks/Cryptography.qll diff --git a/python/ql/src/semmle/python/Frameworks.qll b/python/ql/src/semmle/python/Frameworks.qll index 23ffb88641e..7f252d0218e 100644 --- a/python/ql/src/semmle/python/Frameworks.qll +++ b/python/ql/src/semmle/python/Frameworks.qll @@ -2,6 +2,7 @@ * Helper file that imports all framework modeling. */ +private import semmle.python.frameworks.Cryptography private import semmle.python.frameworks.Dill private import semmle.python.frameworks.Django private import semmle.python.frameworks.Fabric diff --git a/python/ql/src/semmle/python/frameworks/Cryptography.qll b/python/ql/src/semmle/python/frameworks/Cryptography.qll new file mode 100644 index 00000000000..3f7ae145af1 --- /dev/null +++ b/python/ql/src/semmle/python/frameworks/Cryptography.qll @@ -0,0 +1,516 @@ +/** + * Provides classes modeling security-relevant aspects of the `cryptography` PyPI package. + * See https://cryptography.io/en/latest/. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.Concepts + +/** + * Provides models for the `cryptography` PyPI package. + * See https://cryptography.io/en/latest/. + */ +private module CryptographyModel { + // --------------------------------------------------------------------------- + // cryptography + // --------------------------------------------------------------------------- + /** Gets a reference to the `cryptography` module. */ + private DataFlow::Node cryptography(DataFlow::TypeTracker t) { + t.start() and + result = DataFlow::importNode("cryptography") + or + exists(DataFlow::TypeTracker t2 | result = cryptography(t2).track(t2, t)) + } + + /** Gets a reference to the `cryptography` module. */ + DataFlow::Node cryptography() { result = cryptography(DataFlow::TypeTracker::end()) } + + /** Provides models for the `cryptography` module. */ + module cryptography { + /** + * Gets a reference to the attribute `attr_name` of the `cryptography` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node cryptography_attr(DataFlow::TypeTracker t, string attr_name) { + attr_name in ["hazmat"] and + ( + t.start() and + result = DataFlow::importNode("cryptography" + "." + attr_name) + or + t.startInAttr(attr_name) and + result = cryptography() + ) + or + // Due to bad performance when using normal setup with `cryptography_attr(t2, attr_name).track(t2, t)` + // we have inlined that code and forced a join + exists(DataFlow::TypeTracker t2 | + exists(DataFlow::StepSummary summary | + cryptography_attr_first_join(t2, attr_name, result, summary) and + t = t2.append(summary) + ) + ) + } + + pragma[nomagic] + private predicate cryptography_attr_first_join( + DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary + ) { + DataFlow::StepSummary::step(cryptography_attr(t2, attr_name), res, summary) + } + + /** + * Gets a reference to the attribute `attr_name` of the `cryptography` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node cryptography_attr(string attr_name) { + result = cryptography_attr(DataFlow::TypeTracker::end(), attr_name) + } + + // ------------------------------------------------------------------------- + // cryptography.hazmat + // ------------------------------------------------------------------------- + /** Gets a reference to the `cryptography.hazmat` module. */ + DataFlow::Node hazmat() { result = cryptography_attr("hazmat") } + + /** Provides models for the `cryptography.hazmat` module */ + module hazmat { + /** + * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node hazmat_attr(DataFlow::TypeTracker t, string attr_name) { + attr_name in ["primitives"] and + ( + t.start() and + result = DataFlow::importNode("cryptography.hazmat" + "." + attr_name) + or + t.startInAttr(attr_name) and + result = hazmat() + ) + or + // Due to bad performance when using normal setup with `hazmat_attr(t2, attr_name).track(t2, t)` + // we have inlined that code and forced a join + exists(DataFlow::TypeTracker t2 | + exists(DataFlow::StepSummary summary | + hazmat_attr_first_join(t2, attr_name, result, summary) and + t = t2.append(summary) + ) + ) + } + + pragma[nomagic] + private predicate hazmat_attr_first_join( + DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, + DataFlow::StepSummary summary + ) { + DataFlow::StepSummary::step(hazmat_attr(t2, attr_name), res, summary) + } + + /** + * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node hazmat_attr(string attr_name) { + result = hazmat_attr(DataFlow::TypeTracker::end(), attr_name) + } + + // ------------------------------------------------------------------------- + // cryptography.hazmat.primitives + // ------------------------------------------------------------------------- + /** Gets a reference to the `cryptography.hazmat.primitives` module. */ + DataFlow::Node primitives() { result = hazmat_attr("primitives") } + + /** Provides models for the `cryptography.hazmat.primitives` module */ + module primitives { + /** + * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node primitives_attr(DataFlow::TypeTracker t, string attr_name) { + attr_name in ["asymmetric"] and + ( + t.start() and + result = DataFlow::importNode("cryptography.hazmat.primitives" + "." + attr_name) + or + t.startInAttr(attr_name) and + result = primitives() + ) + or + // Due to bad performance when using normal setup with `primitives_attr(t2, attr_name).track(t2, t)` + // we have inlined that code and forced a join + exists(DataFlow::TypeTracker t2 | + exists(DataFlow::StepSummary summary | + primitives_attr_first_join(t2, attr_name, result, summary) and + t = t2.append(summary) + ) + ) + } + + pragma[nomagic] + private predicate primitives_attr_first_join( + DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, + DataFlow::StepSummary summary + ) { + DataFlow::StepSummary::step(primitives_attr(t2, attr_name), res, summary) + } + + /** + * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node primitives_attr(string attr_name) { + result = primitives_attr(DataFlow::TypeTracker::end(), attr_name) + } + + // ------------------------------------------------------------------------- + // cryptography.hazmat.primitives.asymmetric + // ------------------------------------------------------------------------- + /** Gets a reference to the `cryptography.hazmat.primitives.asymmetric` module. */ + DataFlow::Node asymmetric() { result = primitives_attr("asymmetric") } + + /** Provides models for the `cryptography.hazmat.primitives.asymmetric` module */ + module asymmetric { + /** + * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives.asymmetric` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node asymmetric_attr(DataFlow::TypeTracker t, string attr_name) { + attr_name in ["rsa", "dsa", "ec"] and + ( + t.start() and + result = + DataFlow::importNode("cryptography.hazmat.primitives.asymmetric" + "." + attr_name) + or + t.startInAttr(attr_name) and + result = asymmetric() + ) + or + // Due to bad performance when using normal setup with `asymmetric_attr(t2, attr_name).track(t2, t)` + // we have inlined that code and forced a join + exists(DataFlow::TypeTracker t2 | + exists(DataFlow::StepSummary summary | + asymmetric_attr_first_join(t2, attr_name, result, summary) and + t = t2.append(summary) + ) + ) + } + + pragma[nomagic] + private predicate asymmetric_attr_first_join( + DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, + DataFlow::StepSummary summary + ) { + DataFlow::StepSummary::step(asymmetric_attr(t2, attr_name), res, summary) + } + + /** + * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives.asymmetric` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node asymmetric_attr(string attr_name) { + result = asymmetric_attr(DataFlow::TypeTracker::end(), attr_name) + } + + // ------------------------------------------------------------------------- + // cryptography.hazmat.primitives.asymmetric.rsa + // ------------------------------------------------------------------------- + /** Gets a reference to the `cryptography.hazmat.primitives.asymmetric.rsa` module. */ + DataFlow::Node rsa() { result = asymmetric_attr("rsa") } + + /** Provides models for the `cryptography.hazmat.primitives.asymmetric.rsa` module */ + module rsa { + /** + * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives.asymmetric.rsa` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node rsa_attr(DataFlow::TypeTracker t, string attr_name) { + attr_name in ["generate_private_key"] and + ( + t.start() and + result = + DataFlow::importNode("cryptography.hazmat.primitives.asymmetric.rsa" + "." + + attr_name) + or + t.startInAttr(attr_name) and + result = rsa() + ) + or + // Due to bad performance when using normal setup with `rsa_attr(t2, attr_name).track(t2, t)` + // we have inlined that code and forced a join + exists(DataFlow::TypeTracker t2 | + exists(DataFlow::StepSummary summary | + rsa_attr_first_join(t2, attr_name, result, summary) and + t = t2.append(summary) + ) + ) + } + + pragma[nomagic] + private predicate rsa_attr_first_join( + DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, + DataFlow::StepSummary summary + ) { + DataFlow::StepSummary::step(rsa_attr(t2, attr_name), res, summary) + } + + /** + * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives.asymmetric.rsa` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node rsa_attr(string attr_name) { + result = rsa_attr(DataFlow::TypeTracker::end(), attr_name) + } + + /** Gets a reference to the `cryptography.hazmat.primitives.asymmetric.rsa.generate_private_key` function. */ + DataFlow::Node generate_private_key() { result = rsa_attr("generate_private_key") } + } + + // ------------------------------------------------------------------------- + // cryptography.hazmat.primitives.asymmetric.dsa + // ------------------------------------------------------------------------- + /** Gets a reference to the `cryptography.hazmat.primitives.asymmetric.dsa` module. */ + DataFlow::Node dsa() { result = asymmetric_attr("dsa") } + + /** Provides models for the `cryptography.hazmat.primitives.asymmetric.dsa` module */ + module dsa { + /** + * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives.asymmetric.dsa` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node dsa_attr(DataFlow::TypeTracker t, string attr_name) { + attr_name in ["generate_private_key"] and + ( + t.start() and + result = + DataFlow::importNode("cryptography.hazmat.primitives.asymmetric.dsa" + "." + + attr_name) + or + t.startInAttr(attr_name) and + result = dsa() + ) + or + // Due to bad performance when using normal setup with `dsa_attr(t2, attr_name).track(t2, t)` + // we have inlined that code and forced a join + exists(DataFlow::TypeTracker t2 | + exists(DataFlow::StepSummary summary | + dsa_attr_first_join(t2, attr_name, result, summary) and + t = t2.append(summary) + ) + ) + } + + pragma[nomagic] + private predicate dsa_attr_first_join( + DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, + DataFlow::StepSummary summary + ) { + DataFlow::StepSummary::step(dsa_attr(t2, attr_name), res, summary) + } + + /** + * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives.asymmetric.dsa` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node dsa_attr(string attr_name) { + result = dsa_attr(DataFlow::TypeTracker::end(), attr_name) + } + + /** Gets a reference to the `cryptography.hazmat.primitives.asymmetric.dsa.generate_private_key` function. */ + DataFlow::Node generate_private_key() { result = dsa_attr("generate_private_key") } + } + + // ------------------------------------------------------------------------- + // cryptography.hazmat.primitives.asymmetric.ec + // ------------------------------------------------------------------------- + /** Gets a reference to the `cryptography.hazmat.primitives.asymmetric.ec` module. */ + DataFlow::Node ec() { result = asymmetric_attr("ec") } + + /** Provides models for the `cryptography.hazmat.primitives.asymmetric.ec` module */ + module ec { + /** + * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives.asymmetric.ec` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node ec_attr(DataFlow::TypeTracker t, string attr_name) { + attr_name in [ + "generate_private_key", + // curves + "SECT571R1", "SECT409R1", "SECT283R1", "SECT233R1", "SECT163R2", "SECT571K1", + "SECT409K1", "SECT283K1", "SECT233K1", "SECT163K1", "SECP521R1", "SECP384R1", + "SECP256R1", "SECP256K1", "SECP224R1", "SECP192R1", "BrainpoolP256R1", + "BrainpoolP384R1", "BrainpoolP512R1" + ] and + ( + t.start() and + result = + DataFlow::importNode("cryptography.hazmat.primitives.asymmetric.ec" + "." + + attr_name) + or + t.startInAttr(attr_name) and + result = ec() + ) + or + // Due to bad performance when using normal setup with `ec_attr(t2, attr_name).track(t2, t)` + // we have inlined that code and forced a join + exists(DataFlow::TypeTracker t2 | + exists(DataFlow::StepSummary summary | + ec_attr_first_join(t2, attr_name, result, summary) and + t = t2.append(summary) + ) + ) + } + + pragma[nomagic] + private predicate ec_attr_first_join( + DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, + DataFlow::StepSummary summary + ) { + DataFlow::StepSummary::step(ec_attr(t2, attr_name), res, summary) + } + + /** + * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives.asymmetric.ec` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node ec_attr(string attr_name) { + result = ec_attr(DataFlow::TypeTracker::end(), attr_name) + } + + /** Gets a reference to the `cryptography.hazmat.primitives.asymmetric.ec.generate_private_key` function. */ + DataFlow::Node generate_private_key() { result = ec_attr("generate_private_key") } + + /** Gets a predefined curve class with a specific key size (in bits). */ + DataFlow::Node curveClassWithKeySize(int keySize) { + // obtained by manually looking at source code in + // https://github.com/pyca/cryptography/blob/cba69f1922803f4f29a3fde01741890d88b8e217/src/cryptography/hazmat/primitives/asymmetric/ec.py#L208-L300 + result = ec_attr("SECT571R1") and keySize = 570 + or + result = ec_attr("SECT409R1") and keySize = 409 + or + result = ec_attr("SECT283R1") and keySize = 283 + or + result = ec_attr("SECT233R1") and keySize = 233 + or + result = ec_attr("SECT163R2") and keySize = 163 + or + result = ec_attr("SECT571K1") and keySize = 571 + or + result = ec_attr("SECT409K1") and keySize = 409 + or + result = ec_attr("SECT283K1") and keySize = 283 + or + result = ec_attr("SECT233K1") and keySize = 233 + or + result = ec_attr("SECT163K1") and keySize = 163 + or + result = ec_attr("SECP521R1") and keySize = 521 + or + result = ec_attr("SECP384R1") and keySize = 384 + or + result = ec_attr("SECP256R1") and keySize = 256 + or + result = ec_attr("SECP256K1") and keySize = 256 + or + result = ec_attr("SECP224R1") and keySize = 224 + or + result = ec_attr("SECP192R1") and keySize = 192 + or + result = ec_attr("BrainpoolP256R1") and keySize = 256 + or + result = ec_attr("BrainpoolP384R1") and keySize = 384 + or + result = ec_attr("BrainpoolP512R1") and keySize = 512 + } + + /** Gets a predefined curve class instance with a specific key size (in bits). */ + private DataFlow::Node curveClassInstanceWithKeySize( + DataFlow::TypeTracker t, int keySize + ) { + t.start() and + result.asCfgNode().(CallNode).getFunction() = + curveClassWithKeySize(keySize).asCfgNode() + or + exists(DataFlow::TypeTracker t2 | + result = curveClassInstanceWithKeySize(t2, keySize).track(t2, t) + ) + } + + /** Gets a predefined curve class instance with a specific key size (in bits). */ + DataFlow::Node curveClassInstanceWithKeySize(int keySize) { + result = curveClassInstanceWithKeySize(DataFlow::TypeTracker::end(), keySize) + } + } + } + } + } + } + + // --------------------------------------------------------------------------- + /** + * A call to `cryptography.hazmat.primitives.asymmetric.rsa.generate_private_key` + * + * See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/rsa.html#cryptography.hazmat.primitives.asymmetric.rsa.generate_private_key + */ + class CryptographyRSAGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::RSARange, + DataFlow::CfgNode { + override CallNode node; + + CryptographyRSAGeneratePrivateKeyCall() { + node.getFunction() = + cryptography::hazmat::primitives::asymmetric::rsa::generate_private_key().asCfgNode() + } + + override DataFlow::Node getKeySizeArg() { + result.asCfgNode() in [node.getArg(1), node.getArgByName("key_size")] + } + } + + /** + * A call to `cryptography.hazmat.primitives.asymmetric.dsa.generate_private_key` + * + * See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/dsa.html#cryptography.hazmat.primitives.asymmetric.dsa.generate_private_key + */ + class CryptographyDSAGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::DSARange, + DataFlow::CfgNode { + override CallNode node; + + CryptographyDSAGeneratePrivateKeyCall() { + node.getFunction() = + cryptography::hazmat::primitives::asymmetric::dsa::generate_private_key().asCfgNode() + } + + override DataFlow::Node getKeySizeArg() { + result.asCfgNode() in [node.getArg(0), node.getArgByName("key_size")] + } + } + + /** + * A call to `cryptography.hazmat.primitives.asymmetric.ec.generate_private_key` + * + * See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/ec.html#cryptography.hazmat.primitives.asymmetric.ec.generate_private_key + */ + class CryptographyECGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::ECCRange, + DataFlow::CfgNode { + override CallNode node; + + CryptographyECGeneratePrivateKeyCall() { + node.getFunction() = + cryptography::hazmat::primitives::asymmetric::ec::generate_private_key().asCfgNode() + } + + /** Gets the argument that specifies the curve to use. */ + DataFlow::Node getCurveArg() { + result.asCfgNode() in [node.getArg(0), node.getArgByName("curve")] + } + + override int getKeySizeWithOrigin(DataFlow::Node origin) { + origin = this.getCurveArg() and + origin = + cryptography::hazmat::primitives::asymmetric::ec::curveClassInstanceWithKeySize(result) + } + + // Note: There is not really a key-size argument, since it's always specified by the curve. + override DataFlow::Node getKeySizeArg() { none() } + } +} diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptography/test_dsa.py b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_dsa.py index af244a044e6..73aa38a246b 100644 --- a/python/ql/test/experimental/library-tests/frameworks/cryptography/test_dsa.py +++ b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_dsa.py @@ -7,7 +7,7 @@ from cryptography.exceptions import InvalidSignature HASH_ALGORITHM = hashes.SHA256() -private_key = dsa.generate_private_key(key_size=2048) # $ MISSING: PublicKeyGeneration keySize=2048 +private_key = dsa.generate_private_key(key_size=2048) # $ PublicKeyGeneration keySize=2048 public_key = private_key.public_key() message = b"message" diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptography/test_ec.py b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_ec.py index e62d5f2242b..0372d7e9dbf 100644 --- a/python/ql/test/experimental/library-tests/frameworks/cryptography/test_ec.py +++ b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_ec.py @@ -5,7 +5,7 @@ from cryptography.hazmat.primitives import hashes from cryptography.exceptions import InvalidSignature -private_key = ec.generate_private_key(curve=ec.SECP384R1()) # $ MISSING: PublicKeyGeneration keySize=384 +private_key = ec.generate_private_key(curve=ec.SECP384R1()) # $ PublicKeyGeneration keySize=384 public_key = private_key.public_key() HASH_ALGORITHM = hashes.SHA256() diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptography/test_rsa.py b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_rsa.py index 8e7553dfa4d..ee1d98646a9 100644 --- a/python/ql/test/experimental/library-tests/frameworks/cryptography/test_rsa.py +++ b/python/ql/test/experimental/library-tests/frameworks/cryptography/test_rsa.py @@ -6,7 +6,7 @@ from cryptography.hazmat.primitives import hashes from cryptography.exceptions import InvalidSignature -private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048) # $ MISSING: PublicKeyGeneration keySize=2048 +private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048) # $ PublicKeyGeneration keySize=2048 public_key = private_key.public_key() HASH_ALGORITHM = hashes.SHA256() From 6e4c627209803f79af79a3259520e319bf984bf3 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 2 Feb 2021 16:34:28 +0100 Subject: [PATCH 05/25] Python: Add modeling for `pycryptodomex` PyPI package --- python/ql/src/semmle/python/Frameworks.qll | 1 + .../semmle/python/frameworks/Cryptodome.qll | 354 ++++++++++++++++++ .../frameworks/cryptodome/test_dsa.py | 2 +- .../frameworks/cryptodome/test_ec.py | 2 +- .../frameworks/cryptodome/test_rsa.py | 2 +- 5 files changed, 358 insertions(+), 3 deletions(-) create mode 100644 python/ql/src/semmle/python/frameworks/Cryptodome.qll diff --git a/python/ql/src/semmle/python/Frameworks.qll b/python/ql/src/semmle/python/Frameworks.qll index 7f252d0218e..a00511ca545 100644 --- a/python/ql/src/semmle/python/Frameworks.qll +++ b/python/ql/src/semmle/python/Frameworks.qll @@ -2,6 +2,7 @@ * Helper file that imports all framework modeling. */ +private import semmle.python.frameworks.Cryptodome private import semmle.python.frameworks.Cryptography private import semmle.python.frameworks.Dill private import semmle.python.frameworks.Django diff --git a/python/ql/src/semmle/python/frameworks/Cryptodome.qll b/python/ql/src/semmle/python/frameworks/Cryptodome.qll new file mode 100644 index 00000000000..e960e1baff6 --- /dev/null +++ b/python/ql/src/semmle/python/frameworks/Cryptodome.qll @@ -0,0 +1,354 @@ +/** + * Provides classes modeling security-relevant aspects of + * - the `pycryptodome` PyPI package (imported as `Crypto`) + * - the `pycryptodomex` PyPI package (imported as `Cryptodome`) + * See https://pycryptodome.readthedocs.io/en/latest/. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.Concepts + +/** + * Provides models for + * - the `pycryptodome` PyPI package (imported as `Crypto`) + * - the `pycryptodomex` PyPI package (imported as `Cryptodome`) + * See https://pycryptodome.readthedocs.io/en/latest/ + */ +private module CryptodomeModel { + // --------------------------------------------------------------------------- + // Cryptodome + // --------------------------------------------------------------------------- + /** Gets a reference to the `Cryptodome` module. */ + private DataFlow::Node cryptodome(DataFlow::TypeTracker t) { + t.start() and + result = DataFlow::importNode("Cryptodome") + or + exists(DataFlow::TypeTracker t2 | result = cryptodome(t2).track(t2, t)) + } + + /** Gets a reference to the `Cryptodome` module. */ + DataFlow::Node cryptodome() { result = cryptodome(DataFlow::TypeTracker::end()) } + + /** Provides models for the `Cryptodome` module. */ + module Cryptodome { + /** + * Gets a reference to the attribute `attr_name` of the `Cryptodome` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node cryptodome_attr(DataFlow::TypeTracker t, string attr_name) { + attr_name in ["PublicKey"] and + ( + t.start() and + result = DataFlow::importNode("Cryptodome" + "." + attr_name) + or + t.startInAttr(attr_name) and + result = cryptodome() + ) + or + // Due to bad performance when using normal setup with `cryptodome_attr(t2, attr_name).track(t2, t)` + // we have inlined that code and forced a join + exists(DataFlow::TypeTracker t2 | + exists(DataFlow::StepSummary summary | + cryptodome_attr_first_join(t2, attr_name, result, summary) and + t = t2.append(summary) + ) + ) + } + + pragma[nomagic] + private predicate cryptodome_attr_first_join( + DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary + ) { + DataFlow::StepSummary::step(cryptodome_attr(t2, attr_name), res, summary) + } + + /** + * Gets a reference to the attribute `attr_name` of the `Cryptodome` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node cryptodome_attr(string attr_name) { + result = cryptodome_attr(DataFlow::TypeTracker::end(), attr_name) + } + + // ------------------------------------------------------------------------- + // Cryptodome.PublicKey + // ------------------------------------------------------------------------- + /** Gets a reference to the `Cryptodome.PublicKey` module. */ + DataFlow::Node publicKey() { result = cryptodome_attr("PublicKey") } + + /** Provides models for the `Cryptodome.PublicKey` module */ + module PublicKey { + /** + * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node publicKey_attr(DataFlow::TypeTracker t, string attr_name) { + attr_name in ["RSA", "DSA", "ECC"] and + ( + t.start() and + result = DataFlow::importNode("Cryptodome.PublicKey" + "." + attr_name) + or + t.startInAttr(attr_name) and + result = publicKey() + ) + or + // Due to bad performance when using normal setup with `publicKey_attr(t2, attr_name).track(t2, t)` + // we have inlined that code and forced a join + exists(DataFlow::TypeTracker t2 | + exists(DataFlow::StepSummary summary | + publicKey_attr_first_join(t2, attr_name, result, summary) and + t = t2.append(summary) + ) + ) + } + + pragma[nomagic] + private predicate publicKey_attr_first_join( + DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, + DataFlow::StepSummary summary + ) { + DataFlow::StepSummary::step(publicKey_attr(t2, attr_name), res, summary) + } + + /** + * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node publicKey_attr(string attr_name) { + result = publicKey_attr(DataFlow::TypeTracker::end(), attr_name) + } + + // ------------------------------------------------------------------------- + // Cryptodome.PublicKey.RSA + // ------------------------------------------------------------------------- + /** Gets a reference to the `Cryptodome.PublicKey.RSA` module. */ + DataFlow::Node rsa() { result = publicKey_attr("RSA") } + + /** Provides models for the `Cryptodome.PublicKey.RSA` module */ + module RSA { + /** + * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.RSA` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node rsa_attr(DataFlow::TypeTracker t, string attr_name) { + attr_name in ["generate"] and + ( + t.start() and + result = DataFlow::importNode("Cryptodome.PublicKey.RSA" + "." + attr_name) + or + t.startInAttr(attr_name) and + result = rsa() + ) + or + // Due to bad performance when using normal setup with `rsa_attr(t2, attr_name).track(t2, t)` + // we have inlined that code and forced a join + exists(DataFlow::TypeTracker t2 | + exists(DataFlow::StepSummary summary | + rsa_attr_first_join(t2, attr_name, result, summary) and + t = t2.append(summary) + ) + ) + } + + pragma[nomagic] + private predicate rsa_attr_first_join( + DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, + DataFlow::StepSummary summary + ) { + DataFlow::StepSummary::step(rsa_attr(t2, attr_name), res, summary) + } + + /** + * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.RSA` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node rsa_attr(string attr_name) { + result = rsa_attr(DataFlow::TypeTracker::end(), attr_name) + } + + /** Gets a reference to the `Cryptodome.PublicKey.RSA.generate` function. */ + DataFlow::Node generate() { result = rsa_attr("generate") } + } + + // ------------------------------------------------------------------------- + // Cryptodome.PublicKey.DSA + // ------------------------------------------------------------------------- + /** Gets a reference to the `Cryptodome.PublicKey.DSA` module. */ + DataFlow::Node dsa() { result = publicKey_attr("DSA") } + + /** Provides models for the `Cryptodome.PublicKey.DSA` module */ + module DSA { + /** + * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.DSA` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node dsa_attr(DataFlow::TypeTracker t, string attr_name) { + attr_name in ["generate"] and + ( + t.start() and + result = DataFlow::importNode("Cryptodome.PublicKey.DSA" + "." + attr_name) + or + t.startInAttr(attr_name) and + result = dsa() + ) + or + // Due to bad performance when using normal setup with `dsa_attr(t2, attr_name).track(t2, t)` + // we have inlined that code and forced a join + exists(DataFlow::TypeTracker t2 | + exists(DataFlow::StepSummary summary | + dsa_attr_first_join(t2, attr_name, result, summary) and + t = t2.append(summary) + ) + ) + } + + pragma[nomagic] + private predicate dsa_attr_first_join( + DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, + DataFlow::StepSummary summary + ) { + DataFlow::StepSummary::step(dsa_attr(t2, attr_name), res, summary) + } + + /** + * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.DSA` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node dsa_attr(string attr_name) { + result = dsa_attr(DataFlow::TypeTracker::end(), attr_name) + } + + /** Gets a reference to the `Cryptodome.PublicKey.DSA.generate` function. */ + DataFlow::Node generate() { result = dsa_attr("generate") } + } + + // ------------------------------------------------------------------------- + // Cryptodome.PublicKey.ECC + // ------------------------------------------------------------------------- + /** Gets a reference to the `Cryptodome.PublicKey.ECC` module. */ + DataFlow::Node ecc() { result = publicKey_attr("ECC") } + + /** Provides models for the `Cryptodome.PublicKey.ECC` module */ + module ECC { + /** + * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.ECC` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node ecc_attr(DataFlow::TypeTracker t, string attr_name) { + attr_name in ["generate"] and + ( + t.start() and + result = DataFlow::importNode("Cryptodome.PublicKey.ECC" + "." + attr_name) + or + t.startInAttr(attr_name) and + result = ecc() + ) + or + // Due to bad performance when using normal setup with `ecc_attr(t2, attr_name).track(t2, t)` + // we have inlined that code and forced a join + exists(DataFlow::TypeTracker t2 | + exists(DataFlow::StepSummary summary | + ecc_attr_first_join(t2, attr_name, result, summary) and + t = t2.append(summary) + ) + ) + } + + pragma[nomagic] + private predicate ecc_attr_first_join( + DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, + DataFlow::StepSummary summary + ) { + DataFlow::StepSummary::step(ecc_attr(t2, attr_name), res, summary) + } + + /** + * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.ECC` module. + * WARNING: Only holds for a few predefined attributes. + */ + private DataFlow::Node ecc_attr(string attr_name) { + result = ecc_attr(DataFlow::TypeTracker::end(), attr_name) + } + + /** Gets a reference to the `Cryptodome.PublicKey.ECC.generate` function. */ + DataFlow::Node generate() { result = ecc_attr("generate") } + } + } + } + + // --------------------------------------------------------------------------- + /** + * A call to `Cryptodome.PublicKey.RSA.generate` + * + * See https://pycryptodome.readthedocs.io/en/latest/src/public_key/rsa.html#Crypto.PublicKey.RSA.generate + */ + class CryptodomePublicKeyRSAGenerateCall extends Cryptography::PublicKey::KeyGeneration::RSARange, + DataFlow::CfgNode { + override CallNode node; + + CryptodomePublicKeyRSAGenerateCall() { + node.getFunction() = Cryptodome::PublicKey::RSA::generate().asCfgNode() + } + + override DataFlow::Node getKeySizeArg() { + result.asCfgNode() in [node.getArg(0), node.getArgByName("bits")] + } + } + + /** + * A call to `Cryptodome.PublicKey.DSA.generate` + * + * See https://pycryptodome.readthedocs.io/en/latest/src/public_key/dsa.html#Crypto.PublicKey.DSA.generate + */ + class CryptodomePublicKeyDSAGenerateCall extends Cryptography::PublicKey::KeyGeneration::DSARange, + DataFlow::CfgNode { + override CallNode node; + + CryptodomePublicKeyDSAGenerateCall() { + node.getFunction() = Cryptodome::PublicKey::DSA::generate().asCfgNode() + } + + override DataFlow::Node getKeySizeArg() { + result.asCfgNode() in [node.getArg(0), node.getArgByName("bits")] + } + } + + /** + * A call to `Cryptodome.PublicKey.ECC.generate` + * + * See https://pycryptodome.readthedocs.io/en/latest/src/public_key/ecc.html#Crypto.PublicKey.ECC.generate + */ + class CryptodomePublicKeyEccGenerateCall extends Cryptography::PublicKey::KeyGeneration::ECCRange, + DataFlow::CfgNode { + override CallNode node; + + CryptodomePublicKeyEccGenerateCall() { + node.getFunction() = Cryptodome::PublicKey::ECC::generate().asCfgNode() + } + + /** Gets the argument that specifies the curve to use (a string). */ + DataFlow::Node getCurveArg() { result.asCfgNode() in [node.getArgByName("curve")] } + + string getCurveWithOrigin(DataFlow::Node origin) { + exists(StrConst str | origin = DataFlow::exprNode(str) | + origin.(DataFlow::LocalSourceNode).flowsTo(this.getCurveArg()) and + result = str.getText() + ) + } + + override int getKeySizeWithOrigin(DataFlow::Node origin) { + exists(string curve | curve = getCurveWithOrigin(origin) | + // using list from https://pycryptodome.readthedocs.io/en/latest/src/public_key/ecc.html + curve in ["NIST P-256", "p256", "P-256", "prime256v1", "secp256r1"] and result = 256 + or + curve in ["NIST P-384", "p384", "P-384", "prime384v1", "secp384r1"] and result = 384 + or + curve in ["NIST P-521", "p521", "P-521", "prime521v1", "secp521r1"] and result = 521 + ) + } + + // Note: There is not really a key-size argument, since it's always specified by the curve. + override DataFlow::Node getKeySizeArg() { none() } + } +} diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_dsa.py b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_dsa.py index 044f7c28df9..a33cf8c0944 100644 --- a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_dsa.py +++ b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_dsa.py @@ -6,7 +6,7 @@ from Cryptodome.Signature import DSS from Cryptodome.Hash import SHA256 -private_key = DSA.generate(2048) # $ MISSING: PublicKeyGeneration keySize=2048 +private_key = DSA.generate(2048) # $ PublicKeyGeneration keySize=2048 public_key = private_key.publickey() # ------------------------------------------------------------------------------ diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_ec.py b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_ec.py index b1728dd9629..d3860bbb3b3 100644 --- a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_ec.py +++ b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_ec.py @@ -3,7 +3,7 @@ from Cryptodome.Signature import DSS from Cryptodome.Hash import SHA256 -private_key = ECC.generate(curve="P-256") # $ MISSING: PublicKeyGeneration keySize=256 +private_key = ECC.generate(curve="P-256") # $ PublicKeyGeneration keySize=256 public_key = private_key.public_key() # ------------------------------------------------------------------------------ diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_rsa.py b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_rsa.py index 19cea87d600..fd1feccb29b 100644 --- a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_rsa.py +++ b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_rsa.py @@ -5,7 +5,7 @@ from Cryptodome.Cipher import PKCS1_OAEP from Cryptodome.Signature import pss from Cryptodome.Hash import SHA256 -private_key = RSA.generate(2048) # $ MISSING: PublicKeyGeneration keySize=2048 +private_key = RSA.generate(2048) # $ PublicKeyGeneration keySize=2048 public_key = private_key.publickey() # ------------------------------------------------------------------------------ From d5ff477644a37b289ed95159451d832510ba7cd1 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 2 Feb 2021 16:42:40 +0100 Subject: [PATCH 06/25] Python: Add modeling for `pycryptodome` PyPI package --- .../semmle/python/frameworks/Cryptodome.qll | 69 ++++++++++--------- .../frameworks/crypto/test_dsa.py | 2 +- .../frameworks/crypto/test_ec.py | 2 +- .../frameworks/crypto/test_rsa.py | 2 +- 4 files changed, 39 insertions(+), 36 deletions(-) diff --git a/python/ql/src/semmle/python/frameworks/Cryptodome.qll b/python/ql/src/semmle/python/frameworks/Cryptodome.qll index e960e1baff6..b598cc76677 100644 --- a/python/ql/src/semmle/python/frameworks/Cryptodome.qll +++ b/python/ql/src/semmle/python/frameworks/Cryptodome.qll @@ -19,28 +19,28 @@ private module CryptodomeModel { // --------------------------------------------------------------------------- // Cryptodome // --------------------------------------------------------------------------- - /** Gets a reference to the `Cryptodome` module. */ + /** Gets a reference to the `Cryptodome`/`Crypto` module. */ private DataFlow::Node cryptodome(DataFlow::TypeTracker t) { t.start() and - result = DataFlow::importNode("Cryptodome") + result = DataFlow::importNode(["Cryptodome", "Crypto"]) or exists(DataFlow::TypeTracker t2 | result = cryptodome(t2).track(t2, t)) } - /** Gets a reference to the `Cryptodome` module. */ + /** Gets a reference to the `Cryptodome`/`Crypto` module. */ DataFlow::Node cryptodome() { result = cryptodome(DataFlow::TypeTracker::end()) } - /** Provides models for the `Cryptodome` module. */ + /** Provides models for the `Cryptodome`/`Crypto` module. */ module Cryptodome { /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome` module. + * Gets a reference to the attribute `attr_name` of the `Cryptodome`/`Crypto` module. * WARNING: Only holds for a few predefined attributes. */ private DataFlow::Node cryptodome_attr(DataFlow::TypeTracker t, string attr_name) { attr_name in ["PublicKey"] and ( t.start() and - result = DataFlow::importNode("Cryptodome" + "." + attr_name) + result = DataFlow::importNode(["Cryptodome", "Crypto"] + "." + attr_name) or t.startInAttr(attr_name) and result = cryptodome() @@ -64,7 +64,7 @@ private module CryptodomeModel { } /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome` module. + * Gets a reference to the attribute `attr_name` of the `Cryptodome`/`Crypto` module. * WARNING: Only holds for a few predefined attributes. */ private DataFlow::Node cryptodome_attr(string attr_name) { @@ -74,20 +74,20 @@ private module CryptodomeModel { // ------------------------------------------------------------------------- // Cryptodome.PublicKey // ------------------------------------------------------------------------- - /** Gets a reference to the `Cryptodome.PublicKey` module. */ + /** Gets a reference to the `Cryptodome.PublicKey`/`Crypto.PublicKey` module. */ DataFlow::Node publicKey() { result = cryptodome_attr("PublicKey") } - /** Provides models for the `Cryptodome.PublicKey` module */ + /** Provides models for the `Cryptodome.PublicKey`/`Crypto.PublicKey` module */ module PublicKey { /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey` module. + * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey`/`Crypto.PublicKey` module. * WARNING: Only holds for a few predefined attributes. */ private DataFlow::Node publicKey_attr(DataFlow::TypeTracker t, string attr_name) { attr_name in ["RSA", "DSA", "ECC"] and ( t.start() and - result = DataFlow::importNode("Cryptodome.PublicKey" + "." + attr_name) + result = DataFlow::importNode(["Cryptodome", "Crypto"] + ".PublicKey" + "." + attr_name) or t.startInAttr(attr_name) and result = publicKey() @@ -112,7 +112,7 @@ private module CryptodomeModel { } /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey` module. + * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey`/`Crypto.PublicKey` module. * WARNING: Only holds for a few predefined attributes. */ private DataFlow::Node publicKey_attr(string attr_name) { @@ -122,20 +122,21 @@ private module CryptodomeModel { // ------------------------------------------------------------------------- // Cryptodome.PublicKey.RSA // ------------------------------------------------------------------------- - /** Gets a reference to the `Cryptodome.PublicKey.RSA` module. */ + /** Gets a reference to the `Cryptodome.PublicKey.RSA`/`Crypto.PublicKey.RSA` module. */ DataFlow::Node rsa() { result = publicKey_attr("RSA") } - /** Provides models for the `Cryptodome.PublicKey.RSA` module */ + /** Provides models for the `Cryptodome.PublicKey.RSA`/`Crypto.PublicKey.RSA` module */ module RSA { /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.RSA` module. + * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.RSA`/`Crypto.PublicKey.RSA` module. * WARNING: Only holds for a few predefined attributes. */ private DataFlow::Node rsa_attr(DataFlow::TypeTracker t, string attr_name) { attr_name in ["generate"] and ( t.start() and - result = DataFlow::importNode("Cryptodome.PublicKey.RSA" + "." + attr_name) + result = + DataFlow::importNode(["Cryptodome", "Crypto"] + ".PublicKey.RSA" + "." + attr_name) or t.startInAttr(attr_name) and result = rsa() @@ -160,34 +161,35 @@ private module CryptodomeModel { } /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.RSA` module. + * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.RSA`/`Crypto.PublicKey.RSA` module. * WARNING: Only holds for a few predefined attributes. */ private DataFlow::Node rsa_attr(string attr_name) { result = rsa_attr(DataFlow::TypeTracker::end(), attr_name) } - /** Gets a reference to the `Cryptodome.PublicKey.RSA.generate` function. */ + /** Gets a reference to the `Cryptodome.PublicKey.RSA.generate`/`Crypto.PublicKey.RSA.generate` function. */ DataFlow::Node generate() { result = rsa_attr("generate") } } // ------------------------------------------------------------------------- // Cryptodome.PublicKey.DSA // ------------------------------------------------------------------------- - /** Gets a reference to the `Cryptodome.PublicKey.DSA` module. */ + /** Gets a reference to the `Cryptodome.PublicKey.DSA`/`Crypto.PublicKey.DSA` module. */ DataFlow::Node dsa() { result = publicKey_attr("DSA") } - /** Provides models for the `Cryptodome.PublicKey.DSA` module */ + /** Provides models for the `Cryptodome.PublicKey.DSA`/`Crypto.PublicKey.DSA` module */ module DSA { /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.DSA` module. + * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.DSA`/`Crypto.PublicKey.DSA` module. * WARNING: Only holds for a few predefined attributes. */ private DataFlow::Node dsa_attr(DataFlow::TypeTracker t, string attr_name) { attr_name in ["generate"] and ( t.start() and - result = DataFlow::importNode("Cryptodome.PublicKey.DSA" + "." + attr_name) + result = + DataFlow::importNode(["Cryptodome", "Crypto"] + ".PublicKey.DSA" + "." + attr_name) or t.startInAttr(attr_name) and result = dsa() @@ -212,34 +214,35 @@ private module CryptodomeModel { } /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.DSA` module. + * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.DSA`/`Crypto.PublicKey.DSA` module. * WARNING: Only holds for a few predefined attributes. */ private DataFlow::Node dsa_attr(string attr_name) { result = dsa_attr(DataFlow::TypeTracker::end(), attr_name) } - /** Gets a reference to the `Cryptodome.PublicKey.DSA.generate` function. */ + /** Gets a reference to the `Cryptodome.PublicKey.DSA.generate`/`Crypto.PublicKey.DSA.generate` function. */ DataFlow::Node generate() { result = dsa_attr("generate") } } // ------------------------------------------------------------------------- // Cryptodome.PublicKey.ECC // ------------------------------------------------------------------------- - /** Gets a reference to the `Cryptodome.PublicKey.ECC` module. */ + /** Gets a reference to the `Cryptodome.PublicKey.ECC`/`Crypto.PublicKey.ECC` module. */ DataFlow::Node ecc() { result = publicKey_attr("ECC") } - /** Provides models for the `Cryptodome.PublicKey.ECC` module */ + /** Provides models for the `Cryptodome.PublicKey.ECC`/`Crypto.PublicKey.ECC` module */ module ECC { /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.ECC` module. + * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.ECC`/`Crypto.PublicKey.ECC` module. * WARNING: Only holds for a few predefined attributes. */ private DataFlow::Node ecc_attr(DataFlow::TypeTracker t, string attr_name) { attr_name in ["generate"] and ( t.start() and - result = DataFlow::importNode("Cryptodome.PublicKey.ECC" + "." + attr_name) + result = + DataFlow::importNode(["Cryptodome", "Crypto"] + ".PublicKey.ECC" + "." + attr_name) or t.startInAttr(attr_name) and result = ecc() @@ -264,14 +267,14 @@ private module CryptodomeModel { } /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.ECC` module. + * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.ECC`/`Crypto.PublicKey.ECC` module. * WARNING: Only holds for a few predefined attributes. */ private DataFlow::Node ecc_attr(string attr_name) { result = ecc_attr(DataFlow::TypeTracker::end(), attr_name) } - /** Gets a reference to the `Cryptodome.PublicKey.ECC.generate` function. */ + /** Gets a reference to the `Cryptodome.PublicKey.ECC.generate`/`Crypto.PublicKey.ECC.generate` function. */ DataFlow::Node generate() { result = ecc_attr("generate") } } } @@ -279,7 +282,7 @@ private module CryptodomeModel { // --------------------------------------------------------------------------- /** - * A call to `Cryptodome.PublicKey.RSA.generate` + * A call to `Cryptodome.PublicKey.RSA.generate`/`Crypto.PublicKey.RSA.generate` * * See https://pycryptodome.readthedocs.io/en/latest/src/public_key/rsa.html#Crypto.PublicKey.RSA.generate */ @@ -297,7 +300,7 @@ private module CryptodomeModel { } /** - * A call to `Cryptodome.PublicKey.DSA.generate` + * A call to `Cryptodome.PublicKey.DSA.generate`/`Crypto.PublicKey.DSA.generate` * * See https://pycryptodome.readthedocs.io/en/latest/src/public_key/dsa.html#Crypto.PublicKey.DSA.generate */ @@ -315,7 +318,7 @@ private module CryptodomeModel { } /** - * A call to `Cryptodome.PublicKey.ECC.generate` + * A call to `Cryptodome.PublicKey.ECC.generate`/`Crypto.PublicKey.ECC.generate` * * See https://pycryptodome.readthedocs.io/en/latest/src/public_key/ecc.html#Crypto.PublicKey.ECC.generate */ diff --git a/python/ql/test/experimental/library-tests/frameworks/crypto/test_dsa.py b/python/ql/test/experimental/library-tests/frameworks/crypto/test_dsa.py index 9d58b375187..a6c2c081845 100644 --- a/python/ql/test/experimental/library-tests/frameworks/crypto/test_dsa.py +++ b/python/ql/test/experimental/library-tests/frameworks/crypto/test_dsa.py @@ -6,7 +6,7 @@ from Crypto.Signature import DSS from Crypto.Hash import SHA256 -private_key = DSA.generate(2048) # $ MISSING: PublicKeyGeneration keySize=2048 +private_key = DSA.generate(2048) # $ PublicKeyGeneration keySize=2048 public_key = private_key.publickey() # ------------------------------------------------------------------------------ diff --git a/python/ql/test/experimental/library-tests/frameworks/crypto/test_ec.py b/python/ql/test/experimental/library-tests/frameworks/crypto/test_ec.py index fdf17571232..2b482b4aa4e 100644 --- a/python/ql/test/experimental/library-tests/frameworks/crypto/test_ec.py +++ b/python/ql/test/experimental/library-tests/frameworks/crypto/test_ec.py @@ -3,7 +3,7 @@ from Crypto.Signature import DSS from Crypto.Hash import SHA256 -private_key = ECC.generate(curve="P-256") # $ MISSING: PublicKeyGeneration keySize=256 +private_key = ECC.generate(curve="P-256") # $ PublicKeyGeneration keySize=256 public_key = private_key.public_key() # ------------------------------------------------------------------------------ diff --git a/python/ql/test/experimental/library-tests/frameworks/crypto/test_rsa.py b/python/ql/test/experimental/library-tests/frameworks/crypto/test_rsa.py index 68cac4d7ad6..d2532e707e1 100644 --- a/python/ql/test/experimental/library-tests/frameworks/crypto/test_rsa.py +++ b/python/ql/test/experimental/library-tests/frameworks/crypto/test_rsa.py @@ -5,7 +5,7 @@ from Crypto.Cipher import PKCS1_OAEP from Crypto.Signature import pss from Crypto.Hash import SHA256 -private_key = RSA.generate(2048) # $ MISSING: PublicKeyGeneration keySize=2048 +private_key = RSA.generate(2048) # $ PublicKeyGeneration keySize=2048 public_key = private_key.publickey() # ------------------------------------------------------------------------------ From 2429c6c4508f3f4b1232aaebc301dc578dbb4875 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 2 Feb 2021 16:50:42 +0100 Subject: [PATCH 07/25] Python: Rewrite py/weak-crypto-key tests * Removed backend arugment that is not required * Added DSA constants (they are just accidentially the same as RSA right now) * Removed FakeWeakEllipticCurve and used a real weak elliptic curve instead --- .../Security/CWE-326/WeakCrypto.expected | 16 ++--- .../Security/CWE-326/weak_crypto.py | 71 ++++++++++--------- 2 files changed, 44 insertions(+), 43 deletions(-) diff --git a/python/ql/test/query-tests/Security/CWE-326/WeakCrypto.expected b/python/ql/test/query-tests/Security/CWE-326/WeakCrypto.expected index e0e656f4bb3..a52d67eaff9 100644 --- a/python/ql/test/query-tests/Security/CWE-326/WeakCrypto.expected +++ b/python/ql/test/query-tests/Security/CWE-326/WeakCrypto.expected @@ -1,8 +1,8 @@ -| weak_crypto.py:67:1:67:30 | ControlFlowNode for dsa_gen_key() | Creation of an DSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | -| weak_crypto.py:68:1:68:28 | ControlFlowNode for ec_gen_key() | Creation of an ECC key uses $@ bits, which is below 224 and considered breakable. | weak_crypto.py:21:11:21:33 | ControlFlowNode for FakeWeakEllipticCurve() | 160 | -| weak_crypto.py:69:1:69:37 | ControlFlowNode for rsa_gen_key() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | -| weak_crypto.py:71:1:71:39 | ControlFlowNode for dsa_gen_key() | Creation of an DSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | -| weak_crypto.py:72:1:72:34 | ControlFlowNode for ec_gen_key() | Creation of an ECC key uses $@ bits, which is below 224 and considered breakable. | weak_crypto.py:21:11:21:33 | ControlFlowNode for FakeWeakEllipticCurve() | 160 | -| weak_crypto.py:73:1:73:46 | ControlFlowNode for rsa_gen_key() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | -| weak_crypto.py:75:1:75:22 | ControlFlowNode for Attribute() | Creation of an DSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | -| weak_crypto.py:76:1:76:22 | ControlFlowNode for Attribute() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | +| weak_crypto.py:68:1:68:21 | ControlFlowNode for dsa_gen_key() | Creation of an DSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:16:12:16:15 | ControlFlowNode for IntegerLiteral | 1024 | +| weak_crypto.py:69:1:69:19 | ControlFlowNode for ec_gen_key() | Creation of an ECC key uses $@ bits, which is below 224 and considered breakable. | weak_crypto.py:22:11:22:24 | ControlFlowNode for Attribute() | 163 | +| weak_crypto.py:70:1:70:28 | ControlFlowNode for rsa_gen_key() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | +| weak_crypto.py:72:1:72:30 | ControlFlowNode for dsa_gen_key() | Creation of an DSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:16:12:16:15 | ControlFlowNode for IntegerLiteral | 1024 | +| weak_crypto.py:73:1:73:25 | ControlFlowNode for ec_gen_key() | Creation of an ECC key uses $@ bits, which is below 224 and considered breakable. | weak_crypto.py:22:11:22:24 | ControlFlowNode for Attribute() | 163 | +| weak_crypto.py:74:1:74:37 | ControlFlowNode for rsa_gen_key() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | +| weak_crypto.py:76:1:76:22 | ControlFlowNode for Attribute() | Creation of an DSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:16:12:16:15 | ControlFlowNode for IntegerLiteral | 1024 | +| weak_crypto.py:77:1:77:22 | ControlFlowNode for Attribute() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | diff --git a/python/ql/test/query-tests/Security/CWE-326/weak_crypto.py b/python/ql/test/query-tests/Security/CWE-326/weak_crypto.py index 77a123f1617..a64d39bc866 100644 --- a/python/ql/test/query-tests/Security/CWE-326/weak_crypto.py +++ b/python/ql/test/query-tests/Security/CWE-326/weak_crypto.py @@ -1,7 +1,7 @@ from cryptography.hazmat import backends from cryptography.hazmat.primitives.asymmetric import ec, dsa, rsa -#Crypto and Cryptodome have same API +# Crypto and Cryptodome have same API if random(): from Crypto.PublicKey import DSA from Crypto.PublicKey import RSA @@ -12,13 +12,14 @@ else: RSA_WEAK = 1024 RSA_OK = 2048 RSA_STRONG = 3076 + +DSA_WEAK = 1024 +DSA_OK = 2048 +DSA_STRONG = 3076 + BIG = 10000 -class FakeWeakEllipticCurve: - name = "fake" - key_size = 160 - -EC_WEAK = FakeWeakEllipticCurve() +EC_WEAK = ec.SECT163K1() # has key size of 163 EC_OK = ec.SECP224R1() EC_STRONG = ec.SECP384R1() EC_BIG = ec.SECT571R1() @@ -27,50 +28,50 @@ dsa_gen_key = dsa.generate_private_key ec_gen_key = ec.generate_private_key rsa_gen_key = rsa.generate_private_key -default = backends.default_backend() -#Strong and OK keys. -dsa_gen_key(key_size=RSA_OK, backend=default) -dsa_gen_key(key_size=RSA_STRONG, backend=default) -dsa_gen_key(key_size=BIG, backend=default) -ec_gen_key(curve=EC_OK, backend=default) -ec_gen_key(curve=EC_STRONG, backend=default) -ec_gen_key(curve=EC_BIG, backend=default) -rsa_gen_key(public_exponent=65537, key_size=RSA_OK, backend=default) -rsa_gen_key(public_exponent=65537, key_size=RSA_STRONG, backend=default) -rsa_gen_key(public_exponent=65537, key_size=BIG, backend=default) +# Strong and OK keys. + +dsa_gen_key(key_size=DSA_OK) +dsa_gen_key(key_size=DSA_STRONG) +dsa_gen_key(key_size=BIG) +ec_gen_key(curve=EC_OK) +ec_gen_key(curve=EC_STRONG) +ec_gen_key(curve=EC_BIG) +rsa_gen_key(public_exponent=65537, key_size=RSA_OK) +rsa_gen_key(public_exponent=65537, key_size=RSA_STRONG) +rsa_gen_key(public_exponent=65537, key_size=BIG) DSA.generate(bits=RSA_OK) DSA.generate(bits=RSA_STRONG) RSA.generate(bits=RSA_OK) RSA.generate(bits=RSA_STRONG) -dsa_gen_key(RSA_OK, default) -dsa_gen_key(RSA_STRONG, default) -dsa_gen_key(BIG, default) -ec_gen_key(EC_OK, default) -ec_gen_key(EC_STRONG, default) -ec_gen_key(EC_BIG, default) -rsa_gen_key(65537, RSA_OK, default) -rsa_gen_key(65537, RSA_STRONG, default) -rsa_gen_key(65537, BIG, default) +dsa_gen_key(DSA_OK) +dsa_gen_key(DSA_STRONG) +dsa_gen_key(BIG) +ec_gen_key(EC_OK) +ec_gen_key(EC_STRONG) +ec_gen_key(EC_BIG) +rsa_gen_key(65537, RSA_OK) +rsa_gen_key(65537, RSA_STRONG) +rsa_gen_key(65537, BIG) -DSA.generate(RSA_OK) -DSA.generate(RSA_STRONG) +DSA.generate(DSA_OK) +DSA.generate(DSA_STRONG) RSA.generate(RSA_OK) RSA.generate(RSA_STRONG) # Weak keys -dsa_gen_key(RSA_WEAK, default) -ec_gen_key(EC_WEAK, default) -rsa_gen_key(65537, RSA_WEAK, default) +dsa_gen_key(DSA_WEAK) +ec_gen_key(EC_WEAK) +rsa_gen_key(65537, RSA_WEAK) -dsa_gen_key(key_size=RSA_WEAK, default) -ec_gen_key(curve=EC_WEAK, default) -rsa_gen_key(65537, key_size=RSA_WEAK, default) +dsa_gen_key(key_size=DSA_WEAK) +ec_gen_key(curve=EC_WEAK) +rsa_gen_key(65537, key_size=RSA_WEAK) -DSA.generate(RSA_WEAK) +DSA.generate(DSA_WEAK) RSA.generate(RSA_WEAK) From 46ad611d576359a9505c53811ab5d964e4235792 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 2 Feb 2021 17:10:36 +0100 Subject: [PATCH 08/25] Python: Port py/weak-crypto-key to use type-tracking instead of points-to. Looking at query results also made me realize I didn't supply a very good "origin" for ECC in cryptography package, so I improved that :+1: -- maybe that sohuld have been split into multiple commits... too late :( --- .../2021-02-02-port-weak-crypto-key-query.md | 2 + python/ql/src/Security/CWE-326/WeakCrypto.ql | 75 ++--------------- .../CWE-326/WeakCrypto.ql | 81 +++++++++++++++++++ .../semmle/python/frameworks/Cryptography.qll | 21 ++--- .../test/query-tests/Security/CWE-326/options | 1 - 5 files changed, 102 insertions(+), 78 deletions(-) create mode 100644 python/change-notes/2021-02-02-port-weak-crypto-key-query.md create mode 100644 python/ql/src/experimental/Security-old-dataflow/CWE-326/WeakCrypto.ql delete mode 100644 python/ql/test/query-tests/Security/CWE-326/options diff --git a/python/change-notes/2021-02-02-port-weak-crypto-key-query.md b/python/change-notes/2021-02-02-port-weak-crypto-key-query.md new file mode 100644 index 00000000000..20a2e2d1bdf --- /dev/null +++ b/python/change-notes/2021-02-02-port-weak-crypto-key-query.md @@ -0,0 +1,2 @@ +lgtm,codescanning +* Ported _Use of weak cryptographic key_ (`py/weak-crypto-key`) query to use new type-tracking approach instead of points-to. This might result in some difference in results being found, but overall this should result in a more robust and accurate analysis. diff --git a/python/ql/src/Security/CWE-326/WeakCrypto.ql b/python/ql/src/Security/CWE-326/WeakCrypto.ql index 27c1fcce429..5fb3494e3e6 100644 --- a/python/ql/src/Security/CWE-326/WeakCrypto.ql +++ b/python/ql/src/Security/CWE-326/WeakCrypto.ql @@ -10,72 +10,13 @@ */ import python +import semmle.python.Concepts +import semmle.python.dataflow.new.DataFlow -int minimumSecureKeySize(string algo) { - algo = "DSA" and result = 2048 - or - algo = "RSA" and result = 2048 - or - algo = "ECC" and result = 224 -} - -predicate dsaRsaKeySizeArg(FunctionValue func, string algorithm, string arg) { - exists(ModuleValue mod | func = mod.attr(_) | - algorithm = "DSA" and - ( - mod = Module::named("cryptography.hazmat.primitives.asymmetric.dsa") and arg = "key_size" - or - mod = Module::named("Crypto.PublicKey.DSA") and arg = "bits" - or - mod = Module::named("Cryptodome.PublicKey.DSA") and arg = "bits" - ) - or - algorithm = "RSA" and - ( - mod = Module::named("cryptography.hazmat.primitives.asymmetric.rsa") and arg = "key_size" - or - mod = Module::named("Crypto.PublicKey.RSA") and arg = "bits" - or - mod = Module::named("Cryptodome.PublicKey.RSA") and arg = "bits" - ) - ) -} - -predicate ecKeySizeArg(FunctionValue func, string arg) { - exists(ModuleValue mod | func = mod.attr(_) | - mod = Module::named("cryptography.hazmat.primitives.asymmetric.ec") and arg = "curve" - ) -} - -int keySizeFromCurve(ClassValue curveClass) { - result = curveClass.declaredAttribute("key_size").(NumericValue).getIntValue() -} - -predicate algorithmAndKeysizeForCall( - CallNode call, string algorithm, int keySize, ControlFlowNode keyOrigin -) { - exists(FunctionValue func, string argname, ControlFlowNode arg | - arg = func.getNamedArgumentForCall(call, argname) - | - exists(NumericValue key | - arg.pointsTo(key, keyOrigin) and - dsaRsaKeySizeArg(func, algorithm, argname) and - keySize = key.getIntValue() - ) - or - exists(Value curveClassInstance | - algorithm = "ECC" and - ecKeySizeArg(func, argname) and - arg.pointsTo(_, curveClassInstance, keyOrigin) and - keySize = keySizeFromCurve(curveClassInstance.getClass()) - ) - ) -} - -from CallNode call, string algo, int keySize, ControlFlowNode origin +from Cryptography::PublicKey::KeyGeneration keyGen, int keySize, DataFlow::Node origin where - algorithmAndKeysizeForCall(call, algo, keySize, origin) and - keySize < minimumSecureKeySize(algo) -select call, - "Creation of an " + algo + " key uses $@ bits, which is below " + minimumSecureKeySize(algo) + - " and considered breakable.", origin, keySize.toString() + keySize = keyGen.getKeySizeWithOrigin(origin) and + keySize < keyGen.minimumSecureKeySize() +select keyGen, + "Creation of an " + keyGen.getName() + " key uses $@ bits, which is below " + + keyGen.minimumSecureKeySize() + " and considered breakable.", origin, keySize.toString() diff --git a/python/ql/src/experimental/Security-old-dataflow/CWE-326/WeakCrypto.ql b/python/ql/src/experimental/Security-old-dataflow/CWE-326/WeakCrypto.ql new file mode 100644 index 00000000000..27c1fcce429 --- /dev/null +++ b/python/ql/src/experimental/Security-old-dataflow/CWE-326/WeakCrypto.ql @@ -0,0 +1,81 @@ +/** + * @name Use of weak cryptographic key + * @description Use of a cryptographic key that is too small may allow the encryption to be broken. + * @kind problem + * @problem.severity error + * @precision high + * @id py/weak-crypto-key + * @tags security + * external/cwe/cwe-326 + */ + +import python + +int minimumSecureKeySize(string algo) { + algo = "DSA" and result = 2048 + or + algo = "RSA" and result = 2048 + or + algo = "ECC" and result = 224 +} + +predicate dsaRsaKeySizeArg(FunctionValue func, string algorithm, string arg) { + exists(ModuleValue mod | func = mod.attr(_) | + algorithm = "DSA" and + ( + mod = Module::named("cryptography.hazmat.primitives.asymmetric.dsa") and arg = "key_size" + or + mod = Module::named("Crypto.PublicKey.DSA") and arg = "bits" + or + mod = Module::named("Cryptodome.PublicKey.DSA") and arg = "bits" + ) + or + algorithm = "RSA" and + ( + mod = Module::named("cryptography.hazmat.primitives.asymmetric.rsa") and arg = "key_size" + or + mod = Module::named("Crypto.PublicKey.RSA") and arg = "bits" + or + mod = Module::named("Cryptodome.PublicKey.RSA") and arg = "bits" + ) + ) +} + +predicate ecKeySizeArg(FunctionValue func, string arg) { + exists(ModuleValue mod | func = mod.attr(_) | + mod = Module::named("cryptography.hazmat.primitives.asymmetric.ec") and arg = "curve" + ) +} + +int keySizeFromCurve(ClassValue curveClass) { + result = curveClass.declaredAttribute("key_size").(NumericValue).getIntValue() +} + +predicate algorithmAndKeysizeForCall( + CallNode call, string algorithm, int keySize, ControlFlowNode keyOrigin +) { + exists(FunctionValue func, string argname, ControlFlowNode arg | + arg = func.getNamedArgumentForCall(call, argname) + | + exists(NumericValue key | + arg.pointsTo(key, keyOrigin) and + dsaRsaKeySizeArg(func, algorithm, argname) and + keySize = key.getIntValue() + ) + or + exists(Value curveClassInstance | + algorithm = "ECC" and + ecKeySizeArg(func, argname) and + arg.pointsTo(_, curveClassInstance, keyOrigin) and + keySize = keySizeFromCurve(curveClassInstance.getClass()) + ) + ) +} + +from CallNode call, string algo, int keySize, ControlFlowNode origin +where + algorithmAndKeysizeForCall(call, algo, keySize, origin) and + keySize < minimumSecureKeySize(algo) +select call, + "Creation of an " + algo + " key uses $@ bits, which is below " + minimumSecureKeySize(algo) + + " and considered breakable.", origin, keySize.toString() diff --git a/python/ql/src/semmle/python/frameworks/Cryptography.qll b/python/ql/src/semmle/python/frameworks/Cryptography.qll index 3f7ae145af1..cd37c8662b4 100644 --- a/python/ql/src/semmle/python/frameworks/Cryptography.qll +++ b/python/ql/src/semmle/python/frameworks/Cryptography.qll @@ -423,22 +423,23 @@ private module CryptographyModel { result = ec_attr("BrainpoolP512R1") and keySize = 512 } - /** Gets a predefined curve class instance with a specific key size (in bits). */ + /** Gets a reference to a predefined curve class instance with a specific key size (in bits), as well as the origin of the class. */ private DataFlow::Node curveClassInstanceWithKeySize( - DataFlow::TypeTracker t, int keySize + DataFlow::TypeTracker t, int keySize, DataFlow::Node origin ) { t.start() and result.asCfgNode().(CallNode).getFunction() = - curveClassWithKeySize(keySize).asCfgNode() + curveClassWithKeySize(keySize).asCfgNode() and + origin = result or exists(DataFlow::TypeTracker t2 | - result = curveClassInstanceWithKeySize(t2, keySize).track(t2, t) + result = curveClassInstanceWithKeySize(t2, keySize, origin).track(t2, t) ) } - /** Gets a predefined curve class instance with a specific key size (in bits). */ - DataFlow::Node curveClassInstanceWithKeySize(int keySize) { - result = curveClassInstanceWithKeySize(DataFlow::TypeTracker::end(), keySize) + /** Gets a reference to a predefined curve class instance with a specific key size (in bits), as well as the origin of the class. */ + DataFlow::Node curveClassInstanceWithKeySize(int keySize, DataFlow::Node origin) { + result = curveClassInstanceWithKeySize(DataFlow::TypeTracker::end(), keySize, origin) } } } @@ -505,9 +506,9 @@ private module CryptographyModel { } override int getKeySizeWithOrigin(DataFlow::Node origin) { - origin = this.getCurveArg() and - origin = - cryptography::hazmat::primitives::asymmetric::ec::curveClassInstanceWithKeySize(result) + this.getCurveArg() = + cryptography::hazmat::primitives::asymmetric::ec::curveClassInstanceWithKeySize(result, + origin) } // Note: There is not really a key-size argument, since it's always specified by the curve. diff --git a/python/ql/test/query-tests/Security/CWE-326/options b/python/ql/test/query-tests/Security/CWE-326/options deleted file mode 100644 index 492768b3481..00000000000 --- a/python/ql/test/query-tests/Security/CWE-326/options +++ /dev/null @@ -1 +0,0 @@ -semmle-extractor-options: -p ../lib/ --max-import-depth=3 From 0e9a54e9a98fef3a209b5490bff10f35c10b4ffd Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 2 Feb 2021 17:14:56 +0100 Subject: [PATCH 09/25] Python: Rename WeakCrypto to WeakCryptoKey Since WeakCrypto always makes me think that it's about all weak crypto (like using MD5, or completely broken ciphers such as ARC4 ro DES) and not just about weak key generation. --- python/change-notes/2021-02-02-port-weak-crypto-key-query.md | 1 + .../Security/CWE-326/{WeakCrypto.qhelp => WeakCryptoKey.qhelp} | 0 .../ql/src/Security/CWE-326/{WeakCrypto.ql => WeakCryptoKey.ql} | 0 python/ql/test/query-tests/Security/CWE-326/WeakCrypto.qlref | 1 - .../CWE-326/{WeakCrypto.expected => WeakCryptoKey.expected} | 0 python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.qlref | 1 + 6 files changed, 2 insertions(+), 1 deletion(-) rename python/ql/src/Security/CWE-326/{WeakCrypto.qhelp => WeakCryptoKey.qhelp} (100%) rename python/ql/src/Security/CWE-326/{WeakCrypto.ql => WeakCryptoKey.ql} (100%) delete mode 100644 python/ql/test/query-tests/Security/CWE-326/WeakCrypto.qlref rename python/ql/test/query-tests/Security/CWE-326/{WeakCrypto.expected => WeakCryptoKey.expected} (100%) create mode 100644 python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.qlref diff --git a/python/change-notes/2021-02-02-port-weak-crypto-key-query.md b/python/change-notes/2021-02-02-port-weak-crypto-key-query.md index 20a2e2d1bdf..93897c586e8 100644 --- a/python/change-notes/2021-02-02-port-weak-crypto-key-query.md +++ b/python/change-notes/2021-02-02-port-weak-crypto-key-query.md @@ -1,2 +1,3 @@ lgtm,codescanning * Ported _Use of weak cryptographic key_ (`py/weak-crypto-key`) query to use new type-tracking approach instead of points-to. This might result in some difference in results being found, but overall this should result in a more robust and accurate analysis. +* Renamed the query file for _Use of weak cryptographic key_ (`py/weak-crypto-key`) from `WeakCrypto.ql` to `WeakCryptoKey.ql` (in the `python/ql/src/Security/CWE-326/` folder), which could impact custom query suites that include/exclude this query by using it's path. diff --git a/python/ql/src/Security/CWE-326/WeakCrypto.qhelp b/python/ql/src/Security/CWE-326/WeakCryptoKey.qhelp similarity index 100% rename from python/ql/src/Security/CWE-326/WeakCrypto.qhelp rename to python/ql/src/Security/CWE-326/WeakCryptoKey.qhelp diff --git a/python/ql/src/Security/CWE-326/WeakCrypto.ql b/python/ql/src/Security/CWE-326/WeakCryptoKey.ql similarity index 100% rename from python/ql/src/Security/CWE-326/WeakCrypto.ql rename to python/ql/src/Security/CWE-326/WeakCryptoKey.ql diff --git a/python/ql/test/query-tests/Security/CWE-326/WeakCrypto.qlref b/python/ql/test/query-tests/Security/CWE-326/WeakCrypto.qlref deleted file mode 100644 index 75676139ac3..00000000000 --- a/python/ql/test/query-tests/Security/CWE-326/WeakCrypto.qlref +++ /dev/null @@ -1 +0,0 @@ -Security/CWE-326/WeakCrypto.ql diff --git a/python/ql/test/query-tests/Security/CWE-326/WeakCrypto.expected b/python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.expected similarity index 100% rename from python/ql/test/query-tests/Security/CWE-326/WeakCrypto.expected rename to python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.expected diff --git a/python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.qlref b/python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.qlref new file mode 100644 index 00000000000..70a66eef06e --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.qlref @@ -0,0 +1 @@ +Security/CWE-326/WeakCryptoKey.ql From 32d0790500f234e52a1a8eceae3813052cf076c4 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 3 Feb 2021 13:16:56 +0100 Subject: [PATCH 10/25] Python: Use camelCase for RSA/DSA/ECC after asking around, this seems to be the right approach --- python/ql/src/semmle/python/Concepts.qll | 6 +++--- .../ql/src/semmle/python/frameworks/Cryptodome.qll | 10 +++++----- .../ql/src/semmle/python/frameworks/Cryptography.qll | 12 ++++++------ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/python/ql/src/semmle/python/Concepts.qll b/python/ql/src/semmle/python/Concepts.qll index 1f04b077834..989003a1e8a 100644 --- a/python/ql/src/semmle/python/Concepts.qll +++ b/python/ql/src/semmle/python/Concepts.qll @@ -591,21 +591,21 @@ module Cryptography { } /** A data-flow node that generates a new RSA key-pair. */ - abstract class RSARange extends Range { + abstract class RsaRange extends Range { override string getName() { result = "RSA" } override int minimumSecureKeySize() { result = 2048 } } /** A data-flow node that generates a new DSA key-pair. */ - abstract class DSARange extends Range { + abstract class DsaRange extends Range { override string getName() { result = "DSA" } override int minimumSecureKeySize() { result = 2048 } } /** A data-flow node that generates a new ECC key-pair. */ - abstract class ECCRange extends Range { + abstract class EccRange extends Range { override string getName() { result = "ECC" } override int minimumSecureKeySize() { result = 224 } diff --git a/python/ql/src/semmle/python/frameworks/Cryptodome.qll b/python/ql/src/semmle/python/frameworks/Cryptodome.qll index b598cc76677..cd8f589bcd6 100644 --- a/python/ql/src/semmle/python/frameworks/Cryptodome.qll +++ b/python/ql/src/semmle/python/frameworks/Cryptodome.qll @@ -286,11 +286,11 @@ private module CryptodomeModel { * * See https://pycryptodome.readthedocs.io/en/latest/src/public_key/rsa.html#Crypto.PublicKey.RSA.generate */ - class CryptodomePublicKeyRSAGenerateCall extends Cryptography::PublicKey::KeyGeneration::RSARange, + class CryptodomePublicKeyRsaGenerateCall extends Cryptography::PublicKey::KeyGeneration::RsaRange, DataFlow::CfgNode { override CallNode node; - CryptodomePublicKeyRSAGenerateCall() { + CryptodomePublicKeyRsaGenerateCall() { node.getFunction() = Cryptodome::PublicKey::RSA::generate().asCfgNode() } @@ -304,11 +304,11 @@ private module CryptodomeModel { * * See https://pycryptodome.readthedocs.io/en/latest/src/public_key/dsa.html#Crypto.PublicKey.DSA.generate */ - class CryptodomePublicKeyDSAGenerateCall extends Cryptography::PublicKey::KeyGeneration::DSARange, + class CryptodomePublicKeyDsaGenerateCall extends Cryptography::PublicKey::KeyGeneration::DsaRange, DataFlow::CfgNode { override CallNode node; - CryptodomePublicKeyDSAGenerateCall() { + CryptodomePublicKeyDsaGenerateCall() { node.getFunction() = Cryptodome::PublicKey::DSA::generate().asCfgNode() } @@ -322,7 +322,7 @@ private module CryptodomeModel { * * See https://pycryptodome.readthedocs.io/en/latest/src/public_key/ecc.html#Crypto.PublicKey.ECC.generate */ - class CryptodomePublicKeyEccGenerateCall extends Cryptography::PublicKey::KeyGeneration::ECCRange, + class CryptodomePublicKeyEccGenerateCall extends Cryptography::PublicKey::KeyGeneration::EccRange, DataFlow::CfgNode { override CallNode node; diff --git a/python/ql/src/semmle/python/frameworks/Cryptography.qll b/python/ql/src/semmle/python/frameworks/Cryptography.qll index cd37c8662b4..94f3d5c230a 100644 --- a/python/ql/src/semmle/python/frameworks/Cryptography.qll +++ b/python/ql/src/semmle/python/frameworks/Cryptography.qll @@ -453,11 +453,11 @@ private module CryptographyModel { * * See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/rsa.html#cryptography.hazmat.primitives.asymmetric.rsa.generate_private_key */ - class CryptographyRSAGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::RSARange, + class CryptographyRsaGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::RsaRange, DataFlow::CfgNode { override CallNode node; - CryptographyRSAGeneratePrivateKeyCall() { + CryptographyRsaGeneratePrivateKeyCall() { node.getFunction() = cryptography::hazmat::primitives::asymmetric::rsa::generate_private_key().asCfgNode() } @@ -472,11 +472,11 @@ private module CryptographyModel { * * See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/dsa.html#cryptography.hazmat.primitives.asymmetric.dsa.generate_private_key */ - class CryptographyDSAGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::DSARange, + class CryptographyDsaGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::DsaRange, DataFlow::CfgNode { override CallNode node; - CryptographyDSAGeneratePrivateKeyCall() { + CryptographyDsaGeneratePrivateKeyCall() { node.getFunction() = cryptography::hazmat::primitives::asymmetric::dsa::generate_private_key().asCfgNode() } @@ -491,11 +491,11 @@ private module CryptographyModel { * * See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/ec.html#cryptography.hazmat.primitives.asymmetric.ec.generate_private_key */ - class CryptographyECGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::ECCRange, + class CryptographyEcGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::EccRange, DataFlow::CfgNode { override CallNode node; - CryptographyECGeneratePrivateKeyCall() { + CryptographyEcGeneratePrivateKeyCall() { node.getFunction() = cryptography::hazmat::primitives::asymmetric::ec::generate_private_key().asCfgNode() } From 8d3170bcb465133ee272c559256e7a2fbe3e60c9 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 16 Feb 2021 11:07:58 +0100 Subject: [PATCH 11/25] Python: Fix bad join in crypto models --- .../src/semmle/python/frameworks/Cryptography.qll | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/python/ql/src/semmle/python/frameworks/Cryptography.qll b/python/ql/src/semmle/python/frameworks/Cryptography.qll index 94f3d5c230a..02da624c31f 100644 --- a/python/ql/src/semmle/python/frameworks/Cryptography.qll +++ b/python/ql/src/semmle/python/frameworks/Cryptography.qll @@ -432,11 +432,24 @@ private module CryptographyModel { curveClassWithKeySize(keySize).asCfgNode() and origin = result or + // Due to bad performance when using normal setup with we have inlined that code and forced a join exists(DataFlow::TypeTracker t2 | - result = curveClassInstanceWithKeySize(t2, keySize, origin).track(t2, t) + exists(DataFlow::StepSummary summary | + curveClassInstanceWithKeySize_first_join(t2, keySize, origin, result, summary) and + t = t2.append(summary) + ) ) } + pragma[nomagic] + private predicate curveClassInstanceWithKeySize_first_join( + DataFlow::TypeTracker t2, int keySize, DataFlow::Node origin, DataFlow::Node res, + DataFlow::StepSummary summary + ) { + DataFlow::StepSummary::step(curveClassInstanceWithKeySize(t2, keySize, origin), res, + summary) + } + /** Gets a reference to a predefined curve class instance with a specific key size (in bits), as well as the origin of the class. */ DataFlow::Node curveClassInstanceWithKeySize(int keySize, DataFlow::Node origin) { result = curveClassInstanceWithKeySize(DataFlow::TypeTracker::end(), keySize, origin) From bfbaa8527230229887f7200c82dfe530e0b6ef23 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 16 Feb 2021 15:47:39 +0100 Subject: [PATCH 12/25] Python: Add test of public_key method with cryptodome Added in 3.10 release https://github.com/Legrandin/pycryptodome/blob/master/Changelog.rst#3100-6-february-2021 --- .../experimental/library-tests/frameworks/crypto/test_rsa.py | 3 +++ .../library-tests/frameworks/cryptodome/test_rsa.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/python/ql/test/experimental/library-tests/frameworks/crypto/test_rsa.py b/python/ql/test/experimental/library-tests/frameworks/crypto/test_rsa.py index d2532e707e1..7d463e4f384 100644 --- a/python/ql/test/experimental/library-tests/frameworks/crypto/test_rsa.py +++ b/python/ql/test/experimental/library-tests/frameworks/crypto/test_rsa.py @@ -6,7 +6,10 @@ from Crypto.Signature import pss from Crypto.Hash import SHA256 private_key = RSA.generate(2048) # $ PublicKeyGeneration keySize=2048 + +# These 2 methods do the same public_key = private_key.publickey() +public_key = private_key.public_key() # ------------------------------------------------------------------------------ # encrypt/decrypt diff --git a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_rsa.py b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_rsa.py index fd1feccb29b..cee261e5ebe 100644 --- a/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_rsa.py +++ b/python/ql/test/experimental/library-tests/frameworks/cryptodome/test_rsa.py @@ -6,7 +6,10 @@ from Cryptodome.Signature import pss from Cryptodome.Hash import SHA256 private_key = RSA.generate(2048) # $ PublicKeyGeneration keySize=2048 + +# These 2 methods do the same public_key = private_key.publickey() +public_key = private_key.public_key() # ------------------------------------------------------------------------------ # encrypt/decrypt From 1eabfbd0e4a776c479445969b6420aa1b467a464 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 17 Feb 2021 14:12:04 +0100 Subject: [PATCH 13/25] Python: Port cryptography models to use API graphs (mostly) --- .../semmle/python/frameworks/Cryptography.qll | 558 ++++-------------- 1 file changed, 106 insertions(+), 452 deletions(-) diff --git a/python/ql/src/semmle/python/frameworks/Cryptography.qll b/python/ql/src/semmle/python/frameworks/Cryptography.qll index 02da624c31f..1c1e309fcce 100644 --- a/python/ql/src/semmle/python/frameworks/Cryptography.qll +++ b/python/ql/src/semmle/python/frameworks/Cryptography.qll @@ -6,457 +6,103 @@ private import python private import semmle.python.dataflow.new.DataFlow private import semmle.python.Concepts +private import semmle.python.ApiGraphs /** * Provides models for the `cryptography` PyPI package. * See https://cryptography.io/en/latest/. */ private module CryptographyModel { - // --------------------------------------------------------------------------- - // cryptography - // --------------------------------------------------------------------------- - /** Gets a reference to the `cryptography` module. */ - private DataFlow::Node cryptography(DataFlow::TypeTracker t) { - t.start() and - result = DataFlow::importNode("cryptography") - or - exists(DataFlow::TypeTracker t2 | result = cryptography(t2).track(t2, t)) - } - - /** Gets a reference to the `cryptography` module. */ - DataFlow::Node cryptography() { result = cryptography(DataFlow::TypeTracker::end()) } - - /** Provides models for the `cryptography` module. */ - module cryptography { + /** + * Provides helper predicates for the eliptic curve cryptography parts in + * `cryptography.hazmat.primitives.asymmetric.ec`. + */ + module Ecc { /** - * Gets a reference to the attribute `attr_name` of the `cryptography` module. - * WARNING: Only holds for a few predefined attributes. + * Gets a predefined curve class from + * `cryptography.hazmat.primitives.asymmetric.ec` with a specific key size (in bits). */ - private DataFlow::Node cryptography_attr(DataFlow::TypeTracker t, string attr_name) { - attr_name in ["hazmat"] and - ( - t.start() and - result = DataFlow::importNode("cryptography" + "." + attr_name) + private DataFlow::Node curveClassWithKeySize(int keySize) { + exists(string curveName | + result = + API::moduleImport("cryptography") + .getMember("hazmat") + .getMember("primitives") + .getMember("asymmetric") + .getMember("ec") + .getMember(curveName) + .getAUse() + | + // obtained by manually looking at source code in + // https://github.com/pyca/cryptography/blob/cba69f1922803f4f29a3fde01741890d88b8e217/src/cryptography/hazmat/primitives/asymmetric/ec.py#L208-L300 + curveName = "SECT571R1" and keySize = 570 or - t.startInAttr(attr_name) and - result = cryptography() + curveName = "SECT409R1" and keySize = 409 + or + curveName = "SECT283R1" and keySize = 283 + or + curveName = "SECT233R1" and keySize = 233 + or + curveName = "SECT163R2" and keySize = 163 + or + curveName = "SECT571K1" and keySize = 571 + or + curveName = "SECT409K1" and keySize = 409 + or + curveName = "SECT283K1" and keySize = 283 + or + curveName = "SECT233K1" and keySize = 233 + or + curveName = "SECT163K1" and keySize = 163 + or + curveName = "SECP521R1" and keySize = 521 + or + curveName = "SECP384R1" and keySize = 384 + or + curveName = "SECP256R1" and keySize = 256 + or + curveName = "SECP256K1" and keySize = 256 + or + curveName = "SECP224R1" and keySize = 224 + or + curveName = "SECP192R1" and keySize = 192 + or + curveName = "BrainpoolP256R1" and keySize = 256 + or + curveName = "BrainpoolP384R1" and keySize = 384 + or + curveName = "BrainpoolP512R1" and keySize = 512 ) + } + + /** Gets a reference to a predefined curve class instance with a specific key size (in bits), as well as the origin of the class. */ + private DataFlow::Node curveClassInstanceWithKeySize( + DataFlow::TypeTracker t, int keySize, DataFlow::Node origin + ) { + t.start() and + result.asCfgNode().(CallNode).getFunction() = curveClassWithKeySize(keySize).asCfgNode() and + origin = result or - // Due to bad performance when using normal setup with `cryptography_attr(t2, attr_name).track(t2, t)` - // we have inlined that code and forced a join + // Due to bad performance when using normal setup with we have inlined that code and forced a join exists(DataFlow::TypeTracker t2 | exists(DataFlow::StepSummary summary | - cryptography_attr_first_join(t2, attr_name, result, summary) and + curveClassInstanceWithKeySize_first_join(t2, keySize, origin, result, summary) and t = t2.append(summary) ) ) } pragma[nomagic] - private predicate cryptography_attr_first_join( - DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary + private predicate curveClassInstanceWithKeySize_first_join( + DataFlow::TypeTracker t2, int keySize, DataFlow::Node origin, DataFlow::Node res, + DataFlow::StepSummary summary ) { - DataFlow::StepSummary::step(cryptography_attr(t2, attr_name), res, summary) + DataFlow::StepSummary::step(curveClassInstanceWithKeySize(t2, keySize, origin), res, summary) } - /** - * Gets a reference to the attribute `attr_name` of the `cryptography` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node cryptography_attr(string attr_name) { - result = cryptography_attr(DataFlow::TypeTracker::end(), attr_name) - } - - // ------------------------------------------------------------------------- - // cryptography.hazmat - // ------------------------------------------------------------------------- - /** Gets a reference to the `cryptography.hazmat` module. */ - DataFlow::Node hazmat() { result = cryptography_attr("hazmat") } - - /** Provides models for the `cryptography.hazmat` module */ - module hazmat { - /** - * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node hazmat_attr(DataFlow::TypeTracker t, string attr_name) { - attr_name in ["primitives"] and - ( - t.start() and - result = DataFlow::importNode("cryptography.hazmat" + "." + attr_name) - or - t.startInAttr(attr_name) and - result = hazmat() - ) - or - // Due to bad performance when using normal setup with `hazmat_attr(t2, attr_name).track(t2, t)` - // we have inlined that code and forced a join - exists(DataFlow::TypeTracker t2 | - exists(DataFlow::StepSummary summary | - hazmat_attr_first_join(t2, attr_name, result, summary) and - t = t2.append(summary) - ) - ) - } - - pragma[nomagic] - private predicate hazmat_attr_first_join( - DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, - DataFlow::StepSummary summary - ) { - DataFlow::StepSummary::step(hazmat_attr(t2, attr_name), res, summary) - } - - /** - * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node hazmat_attr(string attr_name) { - result = hazmat_attr(DataFlow::TypeTracker::end(), attr_name) - } - - // ------------------------------------------------------------------------- - // cryptography.hazmat.primitives - // ------------------------------------------------------------------------- - /** Gets a reference to the `cryptography.hazmat.primitives` module. */ - DataFlow::Node primitives() { result = hazmat_attr("primitives") } - - /** Provides models for the `cryptography.hazmat.primitives` module */ - module primitives { - /** - * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node primitives_attr(DataFlow::TypeTracker t, string attr_name) { - attr_name in ["asymmetric"] and - ( - t.start() and - result = DataFlow::importNode("cryptography.hazmat.primitives" + "." + attr_name) - or - t.startInAttr(attr_name) and - result = primitives() - ) - or - // Due to bad performance when using normal setup with `primitives_attr(t2, attr_name).track(t2, t)` - // we have inlined that code and forced a join - exists(DataFlow::TypeTracker t2 | - exists(DataFlow::StepSummary summary | - primitives_attr_first_join(t2, attr_name, result, summary) and - t = t2.append(summary) - ) - ) - } - - pragma[nomagic] - private predicate primitives_attr_first_join( - DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, - DataFlow::StepSummary summary - ) { - DataFlow::StepSummary::step(primitives_attr(t2, attr_name), res, summary) - } - - /** - * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node primitives_attr(string attr_name) { - result = primitives_attr(DataFlow::TypeTracker::end(), attr_name) - } - - // ------------------------------------------------------------------------- - // cryptography.hazmat.primitives.asymmetric - // ------------------------------------------------------------------------- - /** Gets a reference to the `cryptography.hazmat.primitives.asymmetric` module. */ - DataFlow::Node asymmetric() { result = primitives_attr("asymmetric") } - - /** Provides models for the `cryptography.hazmat.primitives.asymmetric` module */ - module asymmetric { - /** - * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives.asymmetric` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node asymmetric_attr(DataFlow::TypeTracker t, string attr_name) { - attr_name in ["rsa", "dsa", "ec"] and - ( - t.start() and - result = - DataFlow::importNode("cryptography.hazmat.primitives.asymmetric" + "." + attr_name) - or - t.startInAttr(attr_name) and - result = asymmetric() - ) - or - // Due to bad performance when using normal setup with `asymmetric_attr(t2, attr_name).track(t2, t)` - // we have inlined that code and forced a join - exists(DataFlow::TypeTracker t2 | - exists(DataFlow::StepSummary summary | - asymmetric_attr_first_join(t2, attr_name, result, summary) and - t = t2.append(summary) - ) - ) - } - - pragma[nomagic] - private predicate asymmetric_attr_first_join( - DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, - DataFlow::StepSummary summary - ) { - DataFlow::StepSummary::step(asymmetric_attr(t2, attr_name), res, summary) - } - - /** - * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives.asymmetric` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node asymmetric_attr(string attr_name) { - result = asymmetric_attr(DataFlow::TypeTracker::end(), attr_name) - } - - // ------------------------------------------------------------------------- - // cryptography.hazmat.primitives.asymmetric.rsa - // ------------------------------------------------------------------------- - /** Gets a reference to the `cryptography.hazmat.primitives.asymmetric.rsa` module. */ - DataFlow::Node rsa() { result = asymmetric_attr("rsa") } - - /** Provides models for the `cryptography.hazmat.primitives.asymmetric.rsa` module */ - module rsa { - /** - * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives.asymmetric.rsa` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node rsa_attr(DataFlow::TypeTracker t, string attr_name) { - attr_name in ["generate_private_key"] and - ( - t.start() and - result = - DataFlow::importNode("cryptography.hazmat.primitives.asymmetric.rsa" + "." + - attr_name) - or - t.startInAttr(attr_name) and - result = rsa() - ) - or - // Due to bad performance when using normal setup with `rsa_attr(t2, attr_name).track(t2, t)` - // we have inlined that code and forced a join - exists(DataFlow::TypeTracker t2 | - exists(DataFlow::StepSummary summary | - rsa_attr_first_join(t2, attr_name, result, summary) and - t = t2.append(summary) - ) - ) - } - - pragma[nomagic] - private predicate rsa_attr_first_join( - DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, - DataFlow::StepSummary summary - ) { - DataFlow::StepSummary::step(rsa_attr(t2, attr_name), res, summary) - } - - /** - * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives.asymmetric.rsa` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node rsa_attr(string attr_name) { - result = rsa_attr(DataFlow::TypeTracker::end(), attr_name) - } - - /** Gets a reference to the `cryptography.hazmat.primitives.asymmetric.rsa.generate_private_key` function. */ - DataFlow::Node generate_private_key() { result = rsa_attr("generate_private_key") } - } - - // ------------------------------------------------------------------------- - // cryptography.hazmat.primitives.asymmetric.dsa - // ------------------------------------------------------------------------- - /** Gets a reference to the `cryptography.hazmat.primitives.asymmetric.dsa` module. */ - DataFlow::Node dsa() { result = asymmetric_attr("dsa") } - - /** Provides models for the `cryptography.hazmat.primitives.asymmetric.dsa` module */ - module dsa { - /** - * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives.asymmetric.dsa` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node dsa_attr(DataFlow::TypeTracker t, string attr_name) { - attr_name in ["generate_private_key"] and - ( - t.start() and - result = - DataFlow::importNode("cryptography.hazmat.primitives.asymmetric.dsa" + "." + - attr_name) - or - t.startInAttr(attr_name) and - result = dsa() - ) - or - // Due to bad performance when using normal setup with `dsa_attr(t2, attr_name).track(t2, t)` - // we have inlined that code and forced a join - exists(DataFlow::TypeTracker t2 | - exists(DataFlow::StepSummary summary | - dsa_attr_first_join(t2, attr_name, result, summary) and - t = t2.append(summary) - ) - ) - } - - pragma[nomagic] - private predicate dsa_attr_first_join( - DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, - DataFlow::StepSummary summary - ) { - DataFlow::StepSummary::step(dsa_attr(t2, attr_name), res, summary) - } - - /** - * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives.asymmetric.dsa` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node dsa_attr(string attr_name) { - result = dsa_attr(DataFlow::TypeTracker::end(), attr_name) - } - - /** Gets a reference to the `cryptography.hazmat.primitives.asymmetric.dsa.generate_private_key` function. */ - DataFlow::Node generate_private_key() { result = dsa_attr("generate_private_key") } - } - - // ------------------------------------------------------------------------- - // cryptography.hazmat.primitives.asymmetric.ec - // ------------------------------------------------------------------------- - /** Gets a reference to the `cryptography.hazmat.primitives.asymmetric.ec` module. */ - DataFlow::Node ec() { result = asymmetric_attr("ec") } - - /** Provides models for the `cryptography.hazmat.primitives.asymmetric.ec` module */ - module ec { - /** - * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives.asymmetric.ec` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node ec_attr(DataFlow::TypeTracker t, string attr_name) { - attr_name in [ - "generate_private_key", - // curves - "SECT571R1", "SECT409R1", "SECT283R1", "SECT233R1", "SECT163R2", "SECT571K1", - "SECT409K1", "SECT283K1", "SECT233K1", "SECT163K1", "SECP521R1", "SECP384R1", - "SECP256R1", "SECP256K1", "SECP224R1", "SECP192R1", "BrainpoolP256R1", - "BrainpoolP384R1", "BrainpoolP512R1" - ] and - ( - t.start() and - result = - DataFlow::importNode("cryptography.hazmat.primitives.asymmetric.ec" + "." + - attr_name) - or - t.startInAttr(attr_name) and - result = ec() - ) - or - // Due to bad performance when using normal setup with `ec_attr(t2, attr_name).track(t2, t)` - // we have inlined that code and forced a join - exists(DataFlow::TypeTracker t2 | - exists(DataFlow::StepSummary summary | - ec_attr_first_join(t2, attr_name, result, summary) and - t = t2.append(summary) - ) - ) - } - - pragma[nomagic] - private predicate ec_attr_first_join( - DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, - DataFlow::StepSummary summary - ) { - DataFlow::StepSummary::step(ec_attr(t2, attr_name), res, summary) - } - - /** - * Gets a reference to the attribute `attr_name` of the `cryptography.hazmat.primitives.asymmetric.ec` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node ec_attr(string attr_name) { - result = ec_attr(DataFlow::TypeTracker::end(), attr_name) - } - - /** Gets a reference to the `cryptography.hazmat.primitives.asymmetric.ec.generate_private_key` function. */ - DataFlow::Node generate_private_key() { result = ec_attr("generate_private_key") } - - /** Gets a predefined curve class with a specific key size (in bits). */ - DataFlow::Node curveClassWithKeySize(int keySize) { - // obtained by manually looking at source code in - // https://github.com/pyca/cryptography/blob/cba69f1922803f4f29a3fde01741890d88b8e217/src/cryptography/hazmat/primitives/asymmetric/ec.py#L208-L300 - result = ec_attr("SECT571R1") and keySize = 570 - or - result = ec_attr("SECT409R1") and keySize = 409 - or - result = ec_attr("SECT283R1") and keySize = 283 - or - result = ec_attr("SECT233R1") and keySize = 233 - or - result = ec_attr("SECT163R2") and keySize = 163 - or - result = ec_attr("SECT571K1") and keySize = 571 - or - result = ec_attr("SECT409K1") and keySize = 409 - or - result = ec_attr("SECT283K1") and keySize = 283 - or - result = ec_attr("SECT233K1") and keySize = 233 - or - result = ec_attr("SECT163K1") and keySize = 163 - or - result = ec_attr("SECP521R1") and keySize = 521 - or - result = ec_attr("SECP384R1") and keySize = 384 - or - result = ec_attr("SECP256R1") and keySize = 256 - or - result = ec_attr("SECP256K1") and keySize = 256 - or - result = ec_attr("SECP224R1") and keySize = 224 - or - result = ec_attr("SECP192R1") and keySize = 192 - or - result = ec_attr("BrainpoolP256R1") and keySize = 256 - or - result = ec_attr("BrainpoolP384R1") and keySize = 384 - or - result = ec_attr("BrainpoolP512R1") and keySize = 512 - } - - /** Gets a reference to a predefined curve class instance with a specific key size (in bits), as well as the origin of the class. */ - private DataFlow::Node curveClassInstanceWithKeySize( - DataFlow::TypeTracker t, int keySize, DataFlow::Node origin - ) { - t.start() and - result.asCfgNode().(CallNode).getFunction() = - curveClassWithKeySize(keySize).asCfgNode() and - origin = result - or - // Due to bad performance when using normal setup with we have inlined that code and forced a join - exists(DataFlow::TypeTracker t2 | - exists(DataFlow::StepSummary summary | - curveClassInstanceWithKeySize_first_join(t2, keySize, origin, result, summary) and - t = t2.append(summary) - ) - ) - } - - pragma[nomagic] - private predicate curveClassInstanceWithKeySize_first_join( - DataFlow::TypeTracker t2, int keySize, DataFlow::Node origin, DataFlow::Node res, - DataFlow::StepSummary summary - ) { - DataFlow::StepSummary::step(curveClassInstanceWithKeySize(t2, keySize, origin), res, - summary) - } - - /** Gets a reference to a predefined curve class instance with a specific key size (in bits), as well as the origin of the class. */ - DataFlow::Node curveClassInstanceWithKeySize(int keySize, DataFlow::Node origin) { - result = curveClassInstanceWithKeySize(DataFlow::TypeTracker::end(), keySize, origin) - } - } - } - } + /** Gets a reference to a predefined curve class instance with a specific key size (in bits), as well as the origin of the class. */ + DataFlow::Node curveClassInstanceWithKeySize(int keySize, DataFlow::Node origin) { + result = curveClassInstanceWithKeySize(DataFlow::TypeTracker::end(), keySize, origin) } } @@ -467,16 +113,20 @@ private module CryptographyModel { * See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/rsa.html#cryptography.hazmat.primitives.asymmetric.rsa.generate_private_key */ class CryptographyRsaGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::RsaRange, - DataFlow::CfgNode { - override CallNode node; - + DataFlow::CallCfgNode { CryptographyRsaGeneratePrivateKeyCall() { - node.getFunction() = - cryptography::hazmat::primitives::asymmetric::rsa::generate_private_key().asCfgNode() + this = + API::moduleImport("cryptography") + .getMember("hazmat") + .getMember("primitives") + .getMember("asymmetric") + .getMember("rsa") + .getMember("generate_private_key") + .getACall() } override DataFlow::Node getKeySizeArg() { - result.asCfgNode() in [node.getArg(1), node.getArgByName("key_size")] + result in [this.getArg(1), this.getArgByName("key_size")] } } @@ -486,16 +136,20 @@ private module CryptographyModel { * See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/dsa.html#cryptography.hazmat.primitives.asymmetric.dsa.generate_private_key */ class CryptographyDsaGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::DsaRange, - DataFlow::CfgNode { - override CallNode node; - + DataFlow::CallCfgNode { CryptographyDsaGeneratePrivateKeyCall() { - node.getFunction() = - cryptography::hazmat::primitives::asymmetric::dsa::generate_private_key().asCfgNode() + this = + API::moduleImport("cryptography") + .getMember("hazmat") + .getMember("primitives") + .getMember("asymmetric") + .getMember("dsa") + .getMember("generate_private_key") + .getACall() } override DataFlow::Node getKeySizeArg() { - result.asCfgNode() in [node.getArg(0), node.getArgByName("key_size")] + result in [this.getArg(0), this.getArgByName("key_size")] } } @@ -505,23 +159,23 @@ private module CryptographyModel { * See https://cryptography.io/en/latest/hazmat/primitives/asymmetric/ec.html#cryptography.hazmat.primitives.asymmetric.ec.generate_private_key */ class CryptographyEcGeneratePrivateKeyCall extends Cryptography::PublicKey::KeyGeneration::EccRange, - DataFlow::CfgNode { - override CallNode node; - + DataFlow::CallCfgNode { CryptographyEcGeneratePrivateKeyCall() { - node.getFunction() = - cryptography::hazmat::primitives::asymmetric::ec::generate_private_key().asCfgNode() + this = + API::moduleImport("cryptography") + .getMember("hazmat") + .getMember("primitives") + .getMember("asymmetric") + .getMember("ec") + .getMember("generate_private_key") + .getACall() } /** Gets the argument that specifies the curve to use. */ - DataFlow::Node getCurveArg() { - result.asCfgNode() in [node.getArg(0), node.getArgByName("curve")] - } + DataFlow::Node getCurveArg() { result in [this.getArg(0), this.getArgByName("curve")] } override int getKeySizeWithOrigin(DataFlow::Node origin) { - this.getCurveArg() = - cryptography::hazmat::primitives::asymmetric::ec::curveClassInstanceWithKeySize(result, - origin) + this.getCurveArg() = Ecc::curveClassInstanceWithKeySize(result, origin) } // Note: There is not really a key-size argument, since it's always specified by the curve. From 2a8f720bc6e20250b618b0e5e86f876919996834 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 17 Feb 2021 14:31:33 +0100 Subject: [PATCH 14/25] Python: Port cryptodome models to use API graphs --- .../semmle/python/frameworks/Cryptodome.qll | 307 ++---------------- 1 file changed, 27 insertions(+), 280 deletions(-) diff --git a/python/ql/src/semmle/python/frameworks/Cryptodome.qll b/python/ql/src/semmle/python/frameworks/Cryptodome.qll index cd8f589bcd6..bd28da1067d 100644 --- a/python/ql/src/semmle/python/frameworks/Cryptodome.qll +++ b/python/ql/src/semmle/python/frameworks/Cryptodome.qll @@ -8,6 +8,7 @@ private import python private import semmle.python.dataflow.new.DataFlow private import semmle.python.Concepts +private import semmle.python.ApiGraphs /** * Provides models for @@ -16,270 +17,6 @@ private import semmle.python.Concepts * See https://pycryptodome.readthedocs.io/en/latest/ */ private module CryptodomeModel { - // --------------------------------------------------------------------------- - // Cryptodome - // --------------------------------------------------------------------------- - /** Gets a reference to the `Cryptodome`/`Crypto` module. */ - private DataFlow::Node cryptodome(DataFlow::TypeTracker t) { - t.start() and - result = DataFlow::importNode(["Cryptodome", "Crypto"]) - or - exists(DataFlow::TypeTracker t2 | result = cryptodome(t2).track(t2, t)) - } - - /** Gets a reference to the `Cryptodome`/`Crypto` module. */ - DataFlow::Node cryptodome() { result = cryptodome(DataFlow::TypeTracker::end()) } - - /** Provides models for the `Cryptodome`/`Crypto` module. */ - module Cryptodome { - /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome`/`Crypto` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node cryptodome_attr(DataFlow::TypeTracker t, string attr_name) { - attr_name in ["PublicKey"] and - ( - t.start() and - result = DataFlow::importNode(["Cryptodome", "Crypto"] + "." + attr_name) - or - t.startInAttr(attr_name) and - result = cryptodome() - ) - or - // Due to bad performance when using normal setup with `cryptodome_attr(t2, attr_name).track(t2, t)` - // we have inlined that code and forced a join - exists(DataFlow::TypeTracker t2 | - exists(DataFlow::StepSummary summary | - cryptodome_attr_first_join(t2, attr_name, result, summary) and - t = t2.append(summary) - ) - ) - } - - pragma[nomagic] - private predicate cryptodome_attr_first_join( - DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary - ) { - DataFlow::StepSummary::step(cryptodome_attr(t2, attr_name), res, summary) - } - - /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome`/`Crypto` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node cryptodome_attr(string attr_name) { - result = cryptodome_attr(DataFlow::TypeTracker::end(), attr_name) - } - - // ------------------------------------------------------------------------- - // Cryptodome.PublicKey - // ------------------------------------------------------------------------- - /** Gets a reference to the `Cryptodome.PublicKey`/`Crypto.PublicKey` module. */ - DataFlow::Node publicKey() { result = cryptodome_attr("PublicKey") } - - /** Provides models for the `Cryptodome.PublicKey`/`Crypto.PublicKey` module */ - module PublicKey { - /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey`/`Crypto.PublicKey` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node publicKey_attr(DataFlow::TypeTracker t, string attr_name) { - attr_name in ["RSA", "DSA", "ECC"] and - ( - t.start() and - result = DataFlow::importNode(["Cryptodome", "Crypto"] + ".PublicKey" + "." + attr_name) - or - t.startInAttr(attr_name) and - result = publicKey() - ) - or - // Due to bad performance when using normal setup with `publicKey_attr(t2, attr_name).track(t2, t)` - // we have inlined that code and forced a join - exists(DataFlow::TypeTracker t2 | - exists(DataFlow::StepSummary summary | - publicKey_attr_first_join(t2, attr_name, result, summary) and - t = t2.append(summary) - ) - ) - } - - pragma[nomagic] - private predicate publicKey_attr_first_join( - DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, - DataFlow::StepSummary summary - ) { - DataFlow::StepSummary::step(publicKey_attr(t2, attr_name), res, summary) - } - - /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey`/`Crypto.PublicKey` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node publicKey_attr(string attr_name) { - result = publicKey_attr(DataFlow::TypeTracker::end(), attr_name) - } - - // ------------------------------------------------------------------------- - // Cryptodome.PublicKey.RSA - // ------------------------------------------------------------------------- - /** Gets a reference to the `Cryptodome.PublicKey.RSA`/`Crypto.PublicKey.RSA` module. */ - DataFlow::Node rsa() { result = publicKey_attr("RSA") } - - /** Provides models for the `Cryptodome.PublicKey.RSA`/`Crypto.PublicKey.RSA` module */ - module RSA { - /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.RSA`/`Crypto.PublicKey.RSA` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node rsa_attr(DataFlow::TypeTracker t, string attr_name) { - attr_name in ["generate"] and - ( - t.start() and - result = - DataFlow::importNode(["Cryptodome", "Crypto"] + ".PublicKey.RSA" + "." + attr_name) - or - t.startInAttr(attr_name) and - result = rsa() - ) - or - // Due to bad performance when using normal setup with `rsa_attr(t2, attr_name).track(t2, t)` - // we have inlined that code and forced a join - exists(DataFlow::TypeTracker t2 | - exists(DataFlow::StepSummary summary | - rsa_attr_first_join(t2, attr_name, result, summary) and - t = t2.append(summary) - ) - ) - } - - pragma[nomagic] - private predicate rsa_attr_first_join( - DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, - DataFlow::StepSummary summary - ) { - DataFlow::StepSummary::step(rsa_attr(t2, attr_name), res, summary) - } - - /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.RSA`/`Crypto.PublicKey.RSA` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node rsa_attr(string attr_name) { - result = rsa_attr(DataFlow::TypeTracker::end(), attr_name) - } - - /** Gets a reference to the `Cryptodome.PublicKey.RSA.generate`/`Crypto.PublicKey.RSA.generate` function. */ - DataFlow::Node generate() { result = rsa_attr("generate") } - } - - // ------------------------------------------------------------------------- - // Cryptodome.PublicKey.DSA - // ------------------------------------------------------------------------- - /** Gets a reference to the `Cryptodome.PublicKey.DSA`/`Crypto.PublicKey.DSA` module. */ - DataFlow::Node dsa() { result = publicKey_attr("DSA") } - - /** Provides models for the `Cryptodome.PublicKey.DSA`/`Crypto.PublicKey.DSA` module */ - module DSA { - /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.DSA`/`Crypto.PublicKey.DSA` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node dsa_attr(DataFlow::TypeTracker t, string attr_name) { - attr_name in ["generate"] and - ( - t.start() and - result = - DataFlow::importNode(["Cryptodome", "Crypto"] + ".PublicKey.DSA" + "." + attr_name) - or - t.startInAttr(attr_name) and - result = dsa() - ) - or - // Due to bad performance when using normal setup with `dsa_attr(t2, attr_name).track(t2, t)` - // we have inlined that code and forced a join - exists(DataFlow::TypeTracker t2 | - exists(DataFlow::StepSummary summary | - dsa_attr_first_join(t2, attr_name, result, summary) and - t = t2.append(summary) - ) - ) - } - - pragma[nomagic] - private predicate dsa_attr_first_join( - DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, - DataFlow::StepSummary summary - ) { - DataFlow::StepSummary::step(dsa_attr(t2, attr_name), res, summary) - } - - /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.DSA`/`Crypto.PublicKey.DSA` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node dsa_attr(string attr_name) { - result = dsa_attr(DataFlow::TypeTracker::end(), attr_name) - } - - /** Gets a reference to the `Cryptodome.PublicKey.DSA.generate`/`Crypto.PublicKey.DSA.generate` function. */ - DataFlow::Node generate() { result = dsa_attr("generate") } - } - - // ------------------------------------------------------------------------- - // Cryptodome.PublicKey.ECC - // ------------------------------------------------------------------------- - /** Gets a reference to the `Cryptodome.PublicKey.ECC`/`Crypto.PublicKey.ECC` module. */ - DataFlow::Node ecc() { result = publicKey_attr("ECC") } - - /** Provides models for the `Cryptodome.PublicKey.ECC`/`Crypto.PublicKey.ECC` module */ - module ECC { - /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.ECC`/`Crypto.PublicKey.ECC` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node ecc_attr(DataFlow::TypeTracker t, string attr_name) { - attr_name in ["generate"] and - ( - t.start() and - result = - DataFlow::importNode(["Cryptodome", "Crypto"] + ".PublicKey.ECC" + "." + attr_name) - or - t.startInAttr(attr_name) and - result = ecc() - ) - or - // Due to bad performance when using normal setup with `ecc_attr(t2, attr_name).track(t2, t)` - // we have inlined that code and forced a join - exists(DataFlow::TypeTracker t2 | - exists(DataFlow::StepSummary summary | - ecc_attr_first_join(t2, attr_name, result, summary) and - t = t2.append(summary) - ) - ) - } - - pragma[nomagic] - private predicate ecc_attr_first_join( - DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, - DataFlow::StepSummary summary - ) { - DataFlow::StepSummary::step(ecc_attr(t2, attr_name), res, summary) - } - - /** - * Gets a reference to the attribute `attr_name` of the `Cryptodome.PublicKey.ECC`/`Crypto.PublicKey.ECC` module. - * WARNING: Only holds for a few predefined attributes. - */ - private DataFlow::Node ecc_attr(string attr_name) { - result = ecc_attr(DataFlow::TypeTracker::end(), attr_name) - } - - /** Gets a reference to the `Cryptodome.PublicKey.ECC.generate`/`Crypto.PublicKey.ECC.generate` function. */ - DataFlow::Node generate() { result = ecc_attr("generate") } - } - } - } - // --------------------------------------------------------------------------- /** * A call to `Cryptodome.PublicKey.RSA.generate`/`Crypto.PublicKey.RSA.generate` @@ -287,15 +24,18 @@ private module CryptodomeModel { * See https://pycryptodome.readthedocs.io/en/latest/src/public_key/rsa.html#Crypto.PublicKey.RSA.generate */ class CryptodomePublicKeyRsaGenerateCall extends Cryptography::PublicKey::KeyGeneration::RsaRange, - DataFlow::CfgNode { - override CallNode node; - + DataFlow::CallCfgNode { CryptodomePublicKeyRsaGenerateCall() { - node.getFunction() = Cryptodome::PublicKey::RSA::generate().asCfgNode() + this = + API::moduleImport(["Crypto", "Cryptodome"]) + .getMember("PublicKey") + .getMember("RSA") + .getMember("generate") + .getACall() } override DataFlow::Node getKeySizeArg() { - result.asCfgNode() in [node.getArg(0), node.getArgByName("bits")] + result in [this.getArg(0), this.getArgByName("bits")] } } @@ -305,15 +45,18 @@ private module CryptodomeModel { * See https://pycryptodome.readthedocs.io/en/latest/src/public_key/dsa.html#Crypto.PublicKey.DSA.generate */ class CryptodomePublicKeyDsaGenerateCall extends Cryptography::PublicKey::KeyGeneration::DsaRange, - DataFlow::CfgNode { - override CallNode node; - + DataFlow::CallCfgNode { CryptodomePublicKeyDsaGenerateCall() { - node.getFunction() = Cryptodome::PublicKey::DSA::generate().asCfgNode() + this = + API::moduleImport(["Crypto", "Cryptodome"]) + .getMember("PublicKey") + .getMember("DSA") + .getMember("generate") + .getACall() } override DataFlow::Node getKeySizeArg() { - result.asCfgNode() in [node.getArg(0), node.getArgByName("bits")] + result in [this.getArg(0), this.getArgByName("bits")] } } @@ -323,16 +66,20 @@ private module CryptodomeModel { * See https://pycryptodome.readthedocs.io/en/latest/src/public_key/ecc.html#Crypto.PublicKey.ECC.generate */ class CryptodomePublicKeyEccGenerateCall extends Cryptography::PublicKey::KeyGeneration::EccRange, - DataFlow::CfgNode { - override CallNode node; - + DataFlow::CallCfgNode { CryptodomePublicKeyEccGenerateCall() { - node.getFunction() = Cryptodome::PublicKey::ECC::generate().asCfgNode() + this = + API::moduleImport(["Crypto", "Cryptodome"]) + .getMember("PublicKey") + .getMember("ECC") + .getMember("generate") + .getACall() } /** Gets the argument that specifies the curve to use (a string). */ - DataFlow::Node getCurveArg() { result.asCfgNode() in [node.getArgByName("curve")] } + DataFlow::Node getCurveArg() { result in [this.getArgByName("curve")] } + /** Gets the name of the curve to use, as well as the origin that explains how we obtained this name. */ string getCurveWithOrigin(DataFlow::Node origin) { exists(StrConst str | origin = DataFlow::exprNode(str) | origin.(DataFlow::LocalSourceNode).flowsTo(this.getCurveArg()) and @@ -341,7 +88,7 @@ private module CryptodomeModel { } override int getKeySizeWithOrigin(DataFlow::Node origin) { - exists(string curve | curve = getCurveWithOrigin(origin) | + exists(string curve | curve = this.getCurveWithOrigin(origin) | // using list from https://pycryptodome.readthedocs.io/en/latest/src/public_key/ecc.html curve in ["NIST P-256", "p256", "P-256", "prime256v1", "secp256r1"] and result = 256 or From 37f0d5a28a21cc8090b4d4b9ed8fd56d09231f1f Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 17 Feb 2021 14:35:02 +0100 Subject: [PATCH 15/25] Python: Make KeyGeneration range member overrides final This was the result of an internal dicussion we had about this some time ago. --- python/ql/src/semmle/python/Concepts.qll | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/ql/src/semmle/python/Concepts.qll b/python/ql/src/semmle/python/Concepts.qll index 989003a1e8a..3720c087898 100644 --- a/python/ql/src/semmle/python/Concepts.qll +++ b/python/ql/src/semmle/python/Concepts.qll @@ -592,23 +592,23 @@ module Cryptography { /** A data-flow node that generates a new RSA key-pair. */ abstract class RsaRange extends Range { - override string getName() { result = "RSA" } + final override string getName() { result = "RSA" } - override int minimumSecureKeySize() { result = 2048 } + final override int minimumSecureKeySize() { result = 2048 } } /** A data-flow node that generates a new DSA key-pair. */ abstract class DsaRange extends Range { - override string getName() { result = "DSA" } + final override string getName() { result = "DSA" } - override int minimumSecureKeySize() { result = 2048 } + final override int minimumSecureKeySize() { result = 2048 } } /** A data-flow node that generates a new ECC key-pair. */ abstract class EccRange extends Range { - override string getName() { result = "ECC" } + final override string getName() { result = "ECC" } - override int minimumSecureKeySize() { result = 224 } + final override int minimumSecureKeySize() { result = 224 } } } } From a6583345ba3c2a21b8a919127746aa8cc10a310f Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 19 Feb 2021 13:56:09 +0100 Subject: [PATCH 16/25] Python: Add weak crypto key example through function call We used to handle this, but no more :( Adding this example was inspired by looking at results differences --- .../query-tests/Security/CWE-326/weak_crypto.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/python/ql/test/query-tests/Security/CWE-326/weak_crypto.py b/python/ql/test/query-tests/Security/CWE-326/weak_crypto.py index a64d39bc866..01016a187ee 100644 --- a/python/ql/test/query-tests/Security/CWE-326/weak_crypto.py +++ b/python/ql/test/query-tests/Security/CWE-326/weak_crypto.py @@ -75,3 +75,16 @@ rsa_gen_key(65537, key_size=RSA_WEAK) DSA.generate(DSA_WEAK) RSA.generate(RSA_WEAK) + +# ------------------------------------------------------------------------------ + +# Through function calls + +def make_new_rsa_key_weak(bits): + return RSA.generate(bits) # NOT OK +make_new_rsa_key_weak(RSA_WEAK) + + +def make_new_rsa_key_strong(bits): + return RSA.generate(bits) # OK +make_new_rsa_key_strong(RSA_STRONG) From dfa223ac6afdba22a167de5aeed8d3178e38fde7 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 19 Feb 2021 14:25:38 +0100 Subject: [PATCH 17/25] Python: Better IntegerLiteral tracking for weak crypto key --- python/ql/src/semmle/python/Concepts.qll | 20 +++++++++++++++++-- .../Security/CWE-326/WeakCryptoKey.expected | 1 + 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/python/ql/src/semmle/python/Concepts.qll b/python/ql/src/semmle/python/Concepts.qll index 3720c087898..4f0d4ed55d9 100644 --- a/python/ql/src/semmle/python/Concepts.qll +++ b/python/ql/src/semmle/python/Concepts.qll @@ -562,6 +562,21 @@ module Cryptography { /** Provides classes for modeling new key-pair generation APIs. */ module KeyGeneration { + /** + * A data-flow configuration for tracking integer literals. + */ + private class IntegerLiteralTrackerConfiguration extends DataFlow::Configuration { + IntegerLiteralTrackerConfiguration() { this = "IntegerLiteralTrackerConfiguration" } + + override predicate isSource(DataFlow::Node source) { + source = DataFlow::exprNode(any(IntegerLiteral size)) + } + + override predicate isSink(DataFlow::Node sink) { + sink = any(KeyGeneration::Range kg).getKeySizeArg() + } + } + /** * A data-flow node that generates a new key-pair for use with public-key cryptography. * @@ -580,8 +595,9 @@ module Cryptography { * explains how we obtained this specific key size. */ int getKeySizeWithOrigin(DataFlow::Node origin) { - exists(IntegerLiteral size | origin = DataFlow::exprNode(size) | - origin.(DataFlow::LocalSourceNode).flowsTo(this.getKeySizeArg()) and + exists(IntegerLiteral size, IntegerLiteralTrackerConfiguration config | + origin.asExpr() = size and + config.hasFlow(origin, this.getKeySizeArg()) and result = size.getValue() ) } diff --git a/python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.expected b/python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.expected index a52d67eaff9..05d759d6f70 100644 --- a/python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.expected +++ b/python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.expected @@ -6,3 +6,4 @@ | weak_crypto.py:74:1:74:37 | ControlFlowNode for rsa_gen_key() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | | weak_crypto.py:76:1:76:22 | ControlFlowNode for Attribute() | Creation of an DSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:16:12:16:15 | ControlFlowNode for IntegerLiteral | 1024 | | weak_crypto.py:77:1:77:22 | ControlFlowNode for Attribute() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | +| weak_crypto.py:84:12:84:29 | ControlFlowNode for Attribute() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | From bfc8ead6673e8ab9aaf203d50372f71cd346b427 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 19 Feb 2021 14:35:47 +0100 Subject: [PATCH 18/25] Python: Add example of test-code with weak crypto key --- .../query-tests/Security/CWE-326/WeakCryptoKey.expected | 2 ++ .../ql/test/query-tests/Security/CWE-326/test_example.py | 9 +++++++++ .../ql/test/query-tests/Security/CWE-326/weak_crypto.py | 5 +++++ 3 files changed, 16 insertions(+) create mode 100644 python/ql/test/query-tests/Security/CWE-326/test_example.py diff --git a/python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.expected b/python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.expected index 05d759d6f70..6da7c7c1b2f 100644 --- a/python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.expected +++ b/python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.expected @@ -1,3 +1,4 @@ +| test_example.py:7:5:7:22 | ControlFlowNode for Attribute() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | test_example.py:7:18:7:21 | ControlFlowNode for IntegerLiteral | 1024 | | weak_crypto.py:68:1:68:21 | ControlFlowNode for dsa_gen_key() | Creation of an DSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:16:12:16:15 | ControlFlowNode for IntegerLiteral | 1024 | | weak_crypto.py:69:1:69:19 | ControlFlowNode for ec_gen_key() | Creation of an ECC key uses $@ bits, which is below 224 and considered breakable. | weak_crypto.py:22:11:22:24 | ControlFlowNode for Attribute() | 163 | | weak_crypto.py:70:1:70:28 | ControlFlowNode for rsa_gen_key() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | @@ -7,3 +8,4 @@ | weak_crypto.py:76:1:76:22 | ControlFlowNode for Attribute() | Creation of an DSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:16:12:16:15 | ControlFlowNode for IntegerLiteral | 1024 | | weak_crypto.py:77:1:77:22 | ControlFlowNode for Attribute() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | | weak_crypto.py:84:12:84:29 | ControlFlowNode for Attribute() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | +| weak_crypto.py:95:12:95:29 | ControlFlowNode for Attribute() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | test_example.py:9:23:9:26 | ControlFlowNode for IntegerLiteral | 1024 | diff --git a/python/ql/test/query-tests/Security/CWE-326/test_example.py b/python/ql/test/query-tests/Security/CWE-326/test_example.py new file mode 100644 index 00000000000..e0237deca81 --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-326/test_example.py @@ -0,0 +1,9 @@ +from Cryptodome.PublicKey import RSA + +from weak_crypto import only_used_by_test + +def test_example(): + # This is technically not ok, but since it's in a test, we don't want to alert on it + RSA.generate(1024) + + only_used_by_test(1024) diff --git a/python/ql/test/query-tests/Security/CWE-326/weak_crypto.py b/python/ql/test/query-tests/Security/CWE-326/weak_crypto.py index 01016a187ee..de533254cfe 100644 --- a/python/ql/test/query-tests/Security/CWE-326/weak_crypto.py +++ b/python/ql/test/query-tests/Security/CWE-326/weak_crypto.py @@ -88,3 +88,8 @@ make_new_rsa_key_weak(RSA_WEAK) def make_new_rsa_key_strong(bits): return RSA.generate(bits) # OK make_new_rsa_key_strong(RSA_STRONG) + + +def only_used_by_test(bits): + # Although this call will technically not be ok, since it's only used in a test, we don't want to alert on it. + return RSA.generate(bits) From d084261a793339f8aff504cc25d29e0aeea5aa1d Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 19 Feb 2021 14:37:10 +0100 Subject: [PATCH 19/25] Python: Ignore weak key-sizes from test-code in weak-crypto-key From looking at old results on LGTM.com, this was quite common (and those alerts doesn't really provide value). --- python/ql/src/Security/CWE-326/WeakCryptoKey.ql | 4 +++- .../test/query-tests/Security/CWE-326/WeakCryptoKey.expected | 2 -- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/ql/src/Security/CWE-326/WeakCryptoKey.ql b/python/ql/src/Security/CWE-326/WeakCryptoKey.ql index 5fb3494e3e6..67f94640506 100644 --- a/python/ql/src/Security/CWE-326/WeakCryptoKey.ql +++ b/python/ql/src/Security/CWE-326/WeakCryptoKey.ql @@ -12,11 +12,13 @@ import python import semmle.python.Concepts import semmle.python.dataflow.new.DataFlow +import semmle.python.filters.Tests from Cryptography::PublicKey::KeyGeneration keyGen, int keySize, DataFlow::Node origin where keySize = keyGen.getKeySizeWithOrigin(origin) and - keySize < keyGen.minimumSecureKeySize() + keySize < keyGen.minimumSecureKeySize() and + not origin.getScope().getScope*() instanceof TestScope select keyGen, "Creation of an " + keyGen.getName() + " key uses $@ bits, which is below " + keyGen.minimumSecureKeySize() + " and considered breakable.", origin, keySize.toString() diff --git a/python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.expected b/python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.expected index 6da7c7c1b2f..05d759d6f70 100644 --- a/python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.expected +++ b/python/ql/test/query-tests/Security/CWE-326/WeakCryptoKey.expected @@ -1,4 +1,3 @@ -| test_example.py:7:5:7:22 | ControlFlowNode for Attribute() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | test_example.py:7:18:7:21 | ControlFlowNode for IntegerLiteral | 1024 | | weak_crypto.py:68:1:68:21 | ControlFlowNode for dsa_gen_key() | Creation of an DSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:16:12:16:15 | ControlFlowNode for IntegerLiteral | 1024 | | weak_crypto.py:69:1:69:19 | ControlFlowNode for ec_gen_key() | Creation of an ECC key uses $@ bits, which is below 224 and considered breakable. | weak_crypto.py:22:11:22:24 | ControlFlowNode for Attribute() | 163 | | weak_crypto.py:70:1:70:28 | ControlFlowNode for rsa_gen_key() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | @@ -8,4 +7,3 @@ | weak_crypto.py:76:1:76:22 | ControlFlowNode for Attribute() | Creation of an DSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:16:12:16:15 | ControlFlowNode for IntegerLiteral | 1024 | | weak_crypto.py:77:1:77:22 | ControlFlowNode for Attribute() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | | weak_crypto.py:84:12:84:29 | ControlFlowNode for Attribute() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | weak_crypto.py:12:12:12:15 | ControlFlowNode for IntegerLiteral | 1024 | -| weak_crypto.py:95:12:95:29 | ControlFlowNode for Attribute() | Creation of an RSA key uses $@ bits, which is below 2048 and considered breakable. | test_example.py:9:23:9:26 | ControlFlowNode for IntegerLiteral | 1024 | From 40c592ab8535c88c3e22ec8531d7eb5b98d26ffa Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 19 Feb 2021 15:29:23 +0100 Subject: [PATCH 20/25] Python: Introduce DataFlowOnlyInternalUse to avoid re-evaluation --- config/identical-files.json | 3 +- python/ql/src/semmle/python/Concepts.qll | 3 +- .../dataflow/new/DataFlowOnlyInternalUse.qll | 40 + .../internal/DataFlowImplOnlyInternalUse.qll | 4153 +++++++++++++++++ 4 files changed, 4197 insertions(+), 2 deletions(-) create mode 100644 python/ql/src/semmle/python/dataflow/new/DataFlowOnlyInternalUse.qll create mode 100644 python/ql/src/semmle/python/dataflow/new/internal/DataFlowImplOnlyInternalUse.qll diff --git a/config/identical-files.json b/config/identical-files.json index d68dabba861..5b84fab45cc 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -22,7 +22,8 @@ "python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl.qll", "python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl2.qll", "python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl3.qll", - "python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl4.qll" + "python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl4.qll", + "python/ql/src/semmle/python/dataflow/new/internal/DataFlowImplOnlyInternalUse.qll" ], "DataFlow Java/C++/C#/Python Common": [ "java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplCommon.qll", diff --git a/python/ql/src/semmle/python/Concepts.qll b/python/ql/src/semmle/python/Concepts.qll index 4f0d4ed55d9..ed0a19d197a 100644 --- a/python/ql/src/semmle/python/Concepts.qll +++ b/python/ql/src/semmle/python/Concepts.qll @@ -6,6 +6,7 @@ import python private import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.DataFlowOnlyInternalUse private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.TaintTracking private import semmle.python.Frameworks @@ -565,7 +566,7 @@ module Cryptography { /** * A data-flow configuration for tracking integer literals. */ - private class IntegerLiteralTrackerConfiguration extends DataFlow::Configuration { + private class IntegerLiteralTrackerConfiguration extends DataFlowOnlyInternalUse::Configuration { IntegerLiteralTrackerConfiguration() { this = "IntegerLiteralTrackerConfiguration" } override predicate isSource(DataFlow::Node source) { diff --git a/python/ql/src/semmle/python/dataflow/new/DataFlowOnlyInternalUse.qll b/python/ql/src/semmle/python/dataflow/new/DataFlowOnlyInternalUse.qll new file mode 100644 index 00000000000..8768f25a534 --- /dev/null +++ b/python/ql/src/semmle/python/dataflow/new/DataFlowOnlyInternalUse.qll @@ -0,0 +1,40 @@ +/** + * INTERNAL: Do not use. + * + * This copy exists to allow internal non-query usage of global data-flow analyses. If + * we used the same copy as was used in multiple queries (A, B, C), then all internal + * non-query configurations would have to be re-evaluated for _each_ query, which is + * expensive. By having a separate copy, we avoid this re-evaluation. + * + * Provides a library for local (intra-procedural) and global (inter-procedural) + * data flow analysis: deciding whether data can flow from a _source_ to a + * _sink_. + * + * Unless configured otherwise, _flow_ means that the exact value of + * the source may reach the sink. We do not track flow across pointer + * dereferences or array indexing. To track these types of flow, where the + * exact value may not be preserved, import + * `semmle.python.dataflow.new.TaintTracking`. + * + * To use global (interprocedural) data flow, extend the class + * `DataFlow::Configuration` as documented on that class. To use local + * (intraprocedural) data flow, call `DataFlow::localFlow` or + * `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`. + */ + +private import python + +/** + * INTERNAL: Do not use. + * + * This copy exists to allow internal non-query usage of global data-flow analyses. If + * we used the same copy as was used in multiple queries (A, B, C), then all internal + * non-query configurations would have to be re-evaluated for _each_ query, which is + * expensive. By having a separate copy, we avoid this re-evaluation. + * + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) data flow analyses. + */ +module DataFlowOnlyInternalUse { + import semmle.python.dataflow.new.internal.DataFlowImplOnlyInternalUse +} diff --git a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImplOnlyInternalUse.qll b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImplOnlyInternalUse.qll new file mode 100644 index 00000000000..59cc8d529a7 --- /dev/null +++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImplOnlyInternalUse.qll @@ -0,0 +1,4153 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from + * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed + * through the `Configuration` class. This file exists in several identical + * copies, allowing queries to use multiple `Configuration` classes that depend + * on each other without introducing mutual recursion among those configurations. + */ + +private import DataFlowImplCommon +private import DataFlowImplSpecific::Private +import DataFlowImplSpecific::Public + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ```ql + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and + * the edges are those data-flow steps that preserve the value of the node + * along with any additional edges defined by `isAdditionalFlowStep`. + * Specifying nodes in `isBarrier` will remove those nodes from the graph, and + * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going + * and/or out-going edges from those nodes, respectively. + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + abstract predicate isSource(Node source); + + /** + * Holds if `sink` is a relevant data flow sink. + */ + abstract predicate isSink(Node sink); + + /** + * Holds if data flow through `node` is prohibited. This completely removes + * `node` from the data flow graph. + */ + predicate isBarrier(Node node) { none() } + + /** Holds if data flow into `node` is prohibited. */ + predicate isBarrierIn(Node node) { none() } + + /** Holds if data flow out of `node` is prohibited. */ + predicate isBarrierOut(Node node) { none() } + + /** Holds if data flow through nodes guarded by `guard` is prohibited. */ + predicate isBarrierGuard(BarrierGuard guard) { none() } + + /** + * Holds if the additional flow step from `node1` to `node2` must be taken + * into account in the analysis. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) } + + /** + * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev` + * measured in approximate number of interprocedural steps. + */ + int explorationLimit() { none() } + + /** + * Holds if there is a partial data flow path from `source` to `node`. The + * approximate distance between `node` and the closest source is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards sink definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sources is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + */ + final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { + partialFlow(source, node, this) and + dist = node.getSourceDistance() + } + + /** + * Holds if there is a partial data flow path from `node` to `sink`. The + * approximate distance between `node` and the closest sink is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards source definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sinks is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + * + * Note that reverse flow has slightly lower precision than the corresponding + * forward flow, as reverse flow disregards type pruning among other features. + */ + final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) { + revPartialFlow(node, sink, this) and + dist = node.getSinkDistance() + } +} + +/** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + */ +abstract private class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + super.hasFlow(source, sink) + } +} + +private predicate inBarrier(Node node, Configuration config) { + config.isBarrierIn(node) and + config.isSource(node) +} + +private predicate outBarrier(Node node, Configuration config) { + config.isBarrierOut(node) and + config.isSink(node) +} + +private predicate fullBarrier(Node node, Configuration config) { + config.isBarrier(node) + or + config.isBarrierIn(node) and + not config.isSource(node) + or + config.isBarrierOut(node) and + not config.isSink(node) + or + exists(BarrierGuard g | + config.isBarrierGuard(g) and + node = g.getAGuardedNode() + ) +} + +private class AdditionalFlowStepSource extends Node { + AdditionalFlowStepSource() { any(Configuration c).isAdditionalFlowStep(this, _) } +} + +pragma[noinline] +private predicate isAdditionalFlowStep( + AdditionalFlowStepSource node1, Node node2, DataFlowCallable callable1, Configuration config +) { + config.isAdditionalFlowStep(node1, node2) and + callable1 = node1.getEnclosingCallable() +} + +/** + * Holds if data can flow in one local step from `node1` to `node2`. + */ +private predicate localFlowStep(Node node1, Node node2, Configuration config) { + simpleLocalFlowStep(node1, node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(Node node1, Node node2, Configuration config) { + isAdditionalFlowStep(node1, node2, node2.getEnclosingCallable(), config) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. + */ +private predicate jumpStep(Node node1, Node node2, Configuration config) { + jumpStep(node1, node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(Node node1, Node node2, Configuration config) { + exists(DataFlowCallable callable1 | + isAdditionalFlowStep(node1, node2, callable1, config) and + node2.getEnclosingCallable() != callable1 and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) + ) +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +private module Stage1 { + class ApApprox = Unit; + + class Ap = Unit; + + class ApOption = Unit; + + class Cc = boolean; + + /* Begin: Stage 1 logic. */ + /** + * Holds if `node` is reachable from a source in the configuration `config`. + * + * The Boolean `cc` records whether the node is reached through an + * argument in a call. + */ + predicate fwdFlow(Node node, Cc cc, Configuration config) { + not fullBarrier(node, config) and + ( + config.isSource(node) and + cc = false + or + exists(Node mid | + fwdFlow(mid, cc, config) and + localFlowStep(mid, node, config) + ) + or + exists(Node mid | + fwdFlow(mid, cc, config) and + additionalLocalFlowStep(mid, node, config) + ) + or + exists(Node mid | + fwdFlow(mid, _, config) and + jumpStep(mid, node, config) and + cc = false + ) + or + exists(Node mid | + fwdFlow(mid, _, config) and + additionalJumpStep(mid, node, config) and + cc = false + ) + or + // store + exists(Node mid | + useFieldFlow(config) and + fwdFlow(mid, cc, config) and + store(mid, _, node, _) and + not outBarrier(mid, config) + ) + or + // read + exists(Content c | + fwdFlowRead(c, node, cc, config) and + fwdFlowConsCand(c, config) and + not inBarrier(node, config) + ) + or + // flow into a callable + exists(Node arg | + fwdFlow(arg, _, config) and + viableParamArg(_, node, arg) and + cc = true + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, false, config) and + cc = false + or + fwdFlowOutFromArg(call, node, config) and + fwdFlowIsEntered(call, cc, config) + ) + ) + } + + private predicate fwdFlow(Node node, Configuration config) { fwdFlow(node, _, config) } + + pragma[nomagic] + private predicate fwdFlowRead(Content c, Node node, Cc cc, Configuration config) { + exists(Node mid | + fwdFlow(mid, cc, config) and + read(mid, c, node) + ) + } + + /** + * Holds if `c` is the target of a store in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Content c, Configuration config) { + exists(Node mid, Node node, TypedContent tc | + not fullBarrier(node, config) and + useFieldFlow(config) and + fwdFlow(mid, _, config) and + store(mid, tc, node, _) and + c = tc.getContent() + ) + } + + pragma[nomagic] + private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) { + exists(ReturnNodeExt ret | + fwdFlow(ret, cc, config) and + getReturnPosition(ret) = pos + ) + } + + pragma[nomagic] + private predicate fwdFlowOut(DataFlowCall call, Node out, Cc cc, Configuration config) { + exists(ReturnPosition pos | + fwdFlowReturnPosition(pos, cc, config) and + viableReturnPosOut(call, pos, out) + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg(DataFlowCall call, Node out, Configuration config) { + fwdFlowOut(call, out, true, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) { + exists(ArgumentNode arg | + fwdFlow(arg, cc, config) and + viableParamArg(call, _, arg) + ) + } + + /** + * Holds if `node` is part of a path from a source to a sink in the + * configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink. + */ + pragma[nomagic] + predicate revFlow(Node node, boolean toReturn, Configuration config) { + revFlow0(node, toReturn, config) and + fwdFlow(node, config) + } + + pragma[nomagic] + private predicate revFlow0(Node node, boolean toReturn, Configuration config) { + fwdFlow(node, config) and + config.isSink(node) and + toReturn = false + or + exists(Node mid | + localFlowStep(node, mid, config) and + revFlow(mid, toReturn, config) + ) + or + exists(Node mid | + additionalLocalFlowStep(node, mid, config) and + revFlow(mid, toReturn, config) + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + revFlow(mid, _, config) and + toReturn = false + ) + or + exists(Node mid | + additionalJumpStep(node, mid, config) and + revFlow(mid, _, config) and + toReturn = false + ) + or + // store + exists(Content c | + revFlowStore(c, node, toReturn, config) and + revFlowConsCand(c, config) + ) + or + // read + exists(Node mid, Content c | + read(node, c, mid) and + fwdFlowConsCand(c, unbind(config)) and + revFlow(mid, toReturn, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, false, config) and + toReturn = false + or + revFlowInToReturn(call, node, config) and + revFlowIsReturned(call, toReturn, config) + ) + or + // flow out of a callable + exists(ReturnPosition pos | + revFlowOut(pos, config) and + getReturnPosition(node) = pos and + toReturn = true + ) + } + + /** + * Holds if `c` is the target of a read in the flow covered by `revFlow`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Content c, Configuration config) { + exists(Node mid, Node node | + fwdFlow(node, unbind(config)) and + read(node, c, mid) and + fwdFlowConsCand(c, unbind(config)) and + revFlow(mid, _, config) + ) + } + + pragma[nomagic] + private predicate revFlowStore(Content c, Node node, boolean toReturn, Configuration config) { + exists(Node mid, TypedContent tc | + revFlow(mid, toReturn, config) and + fwdFlowConsCand(c, unbind(config)) and + store(node, tc, mid, _) and + c = tc.getContent() + ) + } + + /** + * Holds if `c` is the target of both a read and a store in the flow covered + * by `revFlow`. + */ + private predicate revFlowIsReadAndStored(Content c, Configuration conf) { + revFlowConsCand(c, conf) and + revFlowStore(c, _, _, conf) + } + + pragma[nomagic] + predicate viableReturnPosOutNodeCandFwd1( + DataFlowCall call, ReturnPosition pos, Node out, Configuration config + ) { + fwdFlowReturnPosition(pos, _, config) and + viableReturnPosOut(call, pos, out) + } + + pragma[nomagic] + private predicate revFlowOut(ReturnPosition pos, Configuration config) { + exists(DataFlowCall call, Node out | + revFlow(out, _, config) and + viableReturnPosOutNodeCandFwd1(call, pos, out, config) + ) + } + + pragma[nomagic] + predicate viableParamArgNodeCandFwd1( + DataFlowCall call, ParameterNode p, ArgumentNode arg, Configuration config + ) { + viableParamArg(call, p, arg) and + fwdFlow(arg, config) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgumentNode arg, boolean toReturn, Configuration config + ) { + exists(ParameterNode p | + revFlow(p, toReturn, config) and + viableParamArgNodeCandFwd1(call, p, arg, config) + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn(DataFlowCall call, ArgumentNode arg, Configuration config) { + revFlowIn(call, arg, true, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow`. + */ + pragma[nomagic] + private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) { + exists(Node out | + revFlow(out, toReturn, config) and + fwdFlowOutFromArg(call, out, config) + ) + } + + pragma[nomagic] + predicate storeStepCand( + Node node1, Ap ap1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config + ) { + exists(Content c | + revFlowIsReadAndStored(c, config) and + revFlow(node2, unbind(config)) and + store(node1, tc, node2, contentType) and + c = tc.getContent() and + exists(ap1) + ) + } + + pragma[nomagic] + predicate readStepCand(Node n1, Content c, Node n2, Configuration config) { + revFlowIsReadAndStored(c, config) and + revFlow(n2, unbind(config)) and + read(n1, c, n2) + } + + pragma[nomagic] + predicate revFlow(Node node, Configuration config) { revFlow(node, _, config) } + + predicate revFlow(Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow(node, toReturn, config) and exists(returnAp) and exists(ap) + } + + private predicate throughFlowNodeCand(Node node, Configuration config) { + revFlow(node, true, config) and + fwdFlow(node, true, config) and + not inBarrier(node, config) and + not outBarrier(node, config) + } + + /** Holds if flow may return from `callable`. */ + pragma[nomagic] + private predicate returnFlowCallableNodeCand( + DataFlowCallable callable, ReturnKindExt kind, Configuration config + ) { + exists(ReturnNodeExt ret | + throughFlowNodeCand(ret, config) and + callable = ret.getEnclosingCallable() and + kind = ret.getKind() + ) + } + + /** + * Holds if flow may enter through `p` and reach a return node making `p` a + * candidate for the origin of a summary. + */ + predicate parameterMayFlowThrough(ParameterNode p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnKindExt kind | + throughFlowNodeCand(p, config) and + returnFlowCallableNodeCand(c, kind, config) and + p.getEnclosingCallable() = c and + exists(ap) and + // we don't expect a parameter to return stored in itself + not exists(int pos | + kind.(ParamUpdateReturnKind).getPosition() = pos and p.isParameterOf(_, pos) + ) + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(Node node | fwdFlow(node, config)) and + fields = count(Content f0 | fwdFlowConsCand(f0, config)) and + conscand = -1 and + tuples = count(Node n, boolean b | fwdFlow(n, b, config)) + or + fwd = false and + nodes = count(Node node | revFlow(node, _, config)) and + fields = count(Content f0 | revFlowConsCand(f0, config)) and + conscand = -1 and + tuples = count(Node n, boolean b | revFlow(n, b, config)) + } + /* End: Stage 1 logic. */ +} + +bindingset[result, b] +private boolean unbindBool(boolean b) { result != b.booleanNot() } + +pragma[noinline] +private predicate localFlowStepNodeCand1(Node node1, Node node2, Configuration config) { + Stage1::revFlow(node2, config) and + localFlowStep(node1, node2, config) +} + +pragma[noinline] +private predicate additionalLocalFlowStepNodeCand1(Node node1, Node node2, Configuration config) { + Stage1::revFlow(node2, config) and + additionalLocalFlowStep(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutNodeCand1( + DataFlowCall call, ReturnPosition pos, Node out, Configuration config +) { + Stage1::revFlow(out, config) and + Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, ReturnNodeExt ret, Node out, Configuration config +) { + viableReturnPosOutNodeCand1(call, getReturnPosition(ret), out, config) and + Stage1::revFlow(ret, config) and + not outBarrier(ret, config) and + not inBarrier(out, config) +} + +pragma[nomagic] +private predicate viableParamArgNodeCand1( + DataFlowCall call, ParameterNode p, ArgumentNode arg, Configuration config +) { + Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and + Stage1::revFlow(arg, config) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgumentNode arg, ParameterNode p, Configuration config +) { + viableParamArgNodeCand1(call, p, arg, config) and + Stage1::revFlow(p, config) and + not outBarrier(arg, config) and + not inBarrier(p, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(Node n1, Configuration conf) { + result = + strictcount(Node n | + flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf) + ) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(Node n2, Configuration conf) { + result = + strictcount(Node n | + flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf) + ) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, ReturnNodeExt ret, Node out, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, ret, out, config) and + exists(int b, int j | + b = branch(ret, config) and + j = join(out, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgumentNode arg, ParameterNode p, boolean allowsFieldFlow, + Configuration config +) { + flowIntoCallNodeCand1(call, arg, p, config) and + exists(int b, int j | + b = branch(arg, config) and + j = join(p, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +private module Stage2 { + module PrevStage = Stage1; + + class ApApprox = PrevStage::Ap; + + class Ap = boolean; + + class ApNil extends Ap { + ApNil() { this = false } + } + + bindingset[result, ap] + private ApApprox getApprox(Ap ap) { any() } + + private ApNil getApNil(Node node) { any() } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) } + + pragma[inline] + private Content getHeadContent(Ap ap) { exists(result) and ap = true } + + class ApOption = BooleanOption; + + ApOption apNone() { result = TBooleanNone() } + + ApOption apSome(Ap ap) { result = TBooleanSome(ap) } + + class Cc = boolean; + + class CcCall extends Cc { + CcCall() { this = true } + + /** Holds if this call context may be `call`. */ + predicate matchesCall(DataFlowCall call) { any() } + } + + class CcNoCall extends Cc { + CcNoCall() { this = false } + } + + Cc ccNone() { result = false } + + private class LocalCc = Unit; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() } + + bindingset[call, c] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() } + + bindingset[innercc, inner, call] + private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) { + any() + } + + bindingset[node, cc, config] + private LocalCc getLocalCc(Node node, Cc cc, Configuration config) { any() } + + private predicate localStep( + Node node1, Node node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc + ) { + ( + preservesValue = true and + localFlowStepNodeCand1(node1, node2, config) + or + preservesValue = false and + additionalLocalFlowStepNodeCand1(node1, node2, config) + ) and + exists(ap) and + exists(lcc) + } + + private predicate flowOutOfCall = flowOutOfCallNodeCand1/5; + + private predicate flowIntoCall = flowIntoCallNodeCand1/5; + + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } + + /* Begin: Stage 2 logic. */ + private predicate flowCand(Node node, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, _, _, apa, config) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + flowCand(node, _, config) and + config.isSource(node) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + or + exists(Node mid, Ap ap0, LocalCc localCc | + fwdFlow(mid, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc, config) + | + localStep(mid, node, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, node, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(Node mid | + fwdFlow(mid, _, _, ap, config) and + flowCand(node, _, unbind(config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(mid, _, _, nil, config) and + flowCand(node, _, unbind(config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, any(CcNoCall innercc), cc, argAp, ap, config) + or + exists(Ap argAp0 | + fwdFlowOutFromArg(call, node, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + Node node1, Ap ap1, TypedContent tc, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, getApprox(ap1), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, Node node1, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + fwdFlow(node1, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParameterNode p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, + Configuration config + ) { + exists(ArgumentNode arg, boolean allowsFieldFlow | + fwdFlow(arg, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if flow may exit from `call` at `out` with access path `ap`. The + * inner call context is `innercc`, but `ccOut` is just the call context + * based on the return step. In the case of through-flow `ccOut` is discarded + * and replaced by the outer call context as tracked by `fwdFlowIsEntered`. + */ + pragma[nomagic] + private predicate fwdFlowOut( + DataFlowCall call, Node out, Cc innercc, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow, DataFlowCallable inner | + fwdFlow(ret, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = ret.getEnclosingCallable() and + checkCallContextReturn(innercc, inner, call) and + ccOut = getCallContextReturn(inner, call) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, Node out, Ap argAp, Ap ap, Configuration config + ) { + fwdFlowOut(call, out, any(CcCall ccc), _, apSome(argAp), ap, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParameterNode p | + fwdFlowIn(call, p, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, getApprox(ap), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + Node node1, Ap ap1, TypedContent tc, Node node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) + } + + private predicate readStepFwd(Node n1, Ap ap1, Content c, Node n2, Ap ap2, Configuration config) { + fwdFlowRead(ap1, c, n1, n2, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow(Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow0(node, toReturn, returnAp, ap, config) and + fwdFlow(node, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + or + exists(Node mid | + localStep(node, mid, true, _, config, _) and + revFlow(mid, toReturn, returnAp, ap, config) + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, config) and + localStep(node, mid, false, _, config, _) and + revFlow(mid, toReturn, returnAp, nil, config) and + ap instanceof ApNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + revFlow(mid, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, config) and + additionalJumpStep(node, mid, config) and + revFlow(mid, _, _, nil, config) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(Node mid, Ap ap0 | + revFlow(mid, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, toReturn, returnAp, ap, config) and + toReturn = false + or + exists(Ap returnAp0 | + revFlowInToReturn(call, node, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + ) + or + // flow out of a callable + revFlowOut(_, node, _, _, ap, config) and + toReturn = true and + if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, Node node, TypedContent tc, Node mid, boolean toReturn, + ApOption returnAp, Configuration config + ) { + revFlow(mid, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(Node mid | + revFlow(mid, _, _, tail, config) and + readStepFwd(_, cons, c, mid, tail, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(Node out, boolean allowsFieldFlow | + revFlow(out, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgumentNode arg, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(ParameterNode p, boolean allowsFieldFlow | + revFlow(p, toReturn, returnAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgumentNode arg, Ap returnAp, Ap ap, Configuration config + ) { + revFlowIn(call, arg, true, apSome(returnAp), ap, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, CcCall ccc | + revFlowOut(call, ret, toReturn, returnAp, ap, config) and + fwdFlow(ret, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + Node node1, Ap ap1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config + ) { + exists(Ap ap2, Content c | + store(node1, tc, node2, contentType) and + revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(Node node1, Content c, Node node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, ap2, config) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, /*unbind*/ unbindBool(ap2), _, _, _, _, _, unbind(config)) + ) + } + + predicate revFlow(Node node, Configuration config) { revFlow(node, _, _, _, config) } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParameterNode p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, true, apSome(ap0), ap, config) and + c = p.getEnclosingCallable() + } + + predicate parameterMayFlowThrough(ParameterNode p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnNodeExt ret, Ap ap0, ReturnKindExt kind, int pos | + parameterFlow(p, ap, ap0, c, config) and + c = ret.getEnclosingCallable() and + revFlow(ret, true, apSome(_), ap0, config) and + fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.isParameterOf(_, pos) and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(Node node | fwdFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + tuples = count(Node n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) + or + fwd = false and + nodes = count(Node node | revFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + tuples = count(Node n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) + } + /* End: Stage 2 logic. */ +} + +pragma[nomagic] +private predicate flowOutOfCallNodeCand2( + DataFlowCall call, ReturnNodeExt node1, Node node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, config) and + Stage2::revFlow(node1, unbind(config)) +} + +pragma[nomagic] +private predicate flowIntoCallNodeCand2( + DataFlowCall call, ArgumentNode node1, ParameterNode node2, boolean allowsFieldFlow, + Configuration config +) { + flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, config) and + Stage2::revFlow(node1, unbind(config)) +} + +private module LocalFlowBigStep { + /** + * A node where some checking is required, and hence the big-step relation + * is not allowed to step over. + */ + private class FlowCheckNode extends Node { + FlowCheckNode() { + this instanceof CastNode or + clearsContent(this, _) + } + } + + /** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + predicate localFlowEntry(Node node, Configuration config) { + Stage2::revFlow(node, config) and + ( + config.isSource(node) or + jumpStep(_, node, config) or + additionalJumpStep(_, node, config) or + node instanceof ParameterNode or + node instanceof OutNodeExt or + store(_, _, node, _) or + read(_, _, node) or + node instanceof FlowCheckNode + ) + } + + /** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowExit(Node node, Configuration config) { + exists(Node next | Stage2::revFlow(next, config) | + jumpStep(node, next, config) or + additionalJumpStep(node, next, config) or + flowIntoCallNodeCand1(_, node, next, config) or + flowOutOfCallNodeCand1(_, node, next, config) or + store(node, _, next, _) or + read(node, _, next) + ) + or + node instanceof FlowCheckNode + or + config.isSink(node) + } + + pragma[noinline] + private predicate additionalLocalFlowStepNodeCand2(Node node1, Node node2, Configuration config) { + additionalLocalFlowStepNodeCand1(node1, node2, config) and + Stage2::revFlow(node1, _, _, false, config) and + Stage2::revFlow(node2, _, _, false, unbind(config)) + } + + /** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `[additional]localFlowStep` beginning + * at `localFlowEntry`. + */ + pragma[nomagic] + private predicate localFlowStepPlus( + Node node1, Node node2, boolean preservesValue, DataFlowType t, Configuration config, + LocalCallContext cc + ) { + not isUnreachableInCall(node2, cc.(LocalCallContextSpecificCall).getCall()) and + ( + localFlowEntry(node1, config) and + ( + localFlowStepNodeCand1(node1, node2, config) and + preservesValue = true and + t = getNodeType(node1) + or + additionalLocalFlowStepNodeCand2(node1, node2, config) and + preservesValue = false and + t = getNodeType(node2) + ) and + node1 != node2 and + cc.relevantFor(node1.getEnclosingCallable()) and + not isUnreachableInCall(node1, cc.(LocalCallContextSpecificCall).getCall()) and + Stage2::revFlow(node2, unbind(config)) + or + exists(Node mid | + localFlowStepPlus(node1, mid, preservesValue, t, config, cc) and + localFlowStepNodeCand1(mid, node2, config) and + not mid instanceof FlowCheckNode and + Stage2::revFlow(node2, unbind(config)) + ) + or + exists(Node mid | + localFlowStepPlus(node1, mid, _, _, config, cc) and + additionalLocalFlowStepNodeCand2(mid, node2, config) and + not mid instanceof FlowCheckNode and + preservesValue = false and + t = getNodeType(node2) and + Stage2::revFlow(node2, unbind(config)) + ) + ) + } + + /** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ + pragma[nomagic] + predicate localFlowBigStep( + Node node1, Node node2, boolean preservesValue, AccessPathFrontNil apf, Configuration config, + LocalCallContext callContext + ) { + localFlowStepPlus(node1, node2, preservesValue, apf.getType(), config, callContext) and + localFlowExit(node2, config) + } +} + +private import LocalFlowBigStep + +private module Stage3 { + module PrevStage = Stage2; + + class ApApprox = PrevStage::Ap; + + class Ap = AccessPathFront; + + class ApNil = AccessPathFrontNil; + + private ApApprox getApprox(Ap ap) { result = ap.toBoolNonEmpty() } + + private ApNil getApNil(Node node) { result = TFrontNil(getNodeType(node)) } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) } + + pragma[noinline] + private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathFrontOption; + + ApOption apNone() { result = TAccessPathFrontNone() } + + ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) } + + class Cc = boolean; + + class CcCall extends Cc { + CcCall() { this = true } + + /** Holds if this call context may be `call`. */ + predicate matchesCall(DataFlowCall call) { any() } + } + + class CcNoCall extends Cc { + CcNoCall() { this = false } + } + + Cc ccNone() { result = false } + + private class LocalCc = Unit; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() } + + bindingset[call, c] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() } + + bindingset[innercc, inner, call] + private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) { + any() + } + + bindingset[node, cc, config] + private LocalCc getLocalCc(Node node, Cc cc, Configuration config) { any() } + + private predicate localStep( + Node node1, Node node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, node2, preservesValue, ap, config, _) and exists(lcc) + } + + private predicate flowOutOfCall = flowOutOfCallNodeCand2/5; + + private predicate flowIntoCall = flowIntoCallNodeCand2/5; + + bindingset[node, ap] + private predicate filter(Node node, Ap ap) { + not ap.isClearedAt(node) and + if node instanceof CastingNode then compatibleTypes(getNodeType(node), ap.getType()) else any() + } + + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { + // We need to typecheck stores here, since reverse flow through a getter + // might have a different type here compared to inside the getter. + compatibleTypes(ap.getType(), contentType) + } + + /* Begin: Stage 3 logic. */ + private predicate flowCand(Node node, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, _, _, apa, config) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + fwdFlow0(node, cc, argAp, ap, config) and + flowCand(node, unbindBool(getApprox(ap)), config) and + filter(node, ap) + } + + pragma[nomagic] + private predicate fwdFlow0(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + flowCand(node, _, config) and + config.isSource(node) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + or + exists(Node mid, Ap ap0, LocalCc localCc | + fwdFlow(mid, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc, config) + | + localStep(mid, node, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, node, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(Node mid | + fwdFlow(mid, _, _, ap, config) and + flowCand(node, _, unbind(config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(mid, _, _, nil, config) and + flowCand(node, _, unbind(config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, any(CcNoCall innercc), cc, argAp, ap, config) + or + exists(Ap argAp0 | + fwdFlowOutFromArg(call, node, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + Node node1, Ap ap1, TypedContent tc, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, unbindBool(getApprox(ap1)), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, Node node1, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + fwdFlow(node1, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParameterNode p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, + Configuration config + ) { + exists(ArgumentNode arg, boolean allowsFieldFlow | + fwdFlow(arg, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if flow may exit from `call` at `out` with access path `ap`. The + * inner call context is `innercc`, but `ccOut` is just the call context + * based on the return step. In the case of through-flow `ccOut` is discarded + * and replaced by the outer call context as tracked by `fwdFlowIsEntered`. + */ + pragma[nomagic] + private predicate fwdFlowOut( + DataFlowCall call, Node out, Cc innercc, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow, DataFlowCallable inner | + fwdFlow(ret, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = ret.getEnclosingCallable() and + checkCallContextReturn(innercc, inner, call) and + ccOut = getCallContextReturn(inner, call) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, Node out, Ap argAp, Ap ap, Configuration config + ) { + fwdFlowOut(call, out, any(CcCall ccc), _, apSome(argAp), ap, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParameterNode p | + fwdFlowIn(call, p, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, unbindBool(getApprox(ap)), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + Node node1, Ap ap1, TypedContent tc, Node node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) + } + + private predicate readStepFwd(Node n1, Ap ap1, Content c, Node n2, Ap ap2, Configuration config) { + fwdFlowRead(ap1, c, n1, n2, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow(Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow0(node, toReturn, returnAp, ap, config) and + fwdFlow(node, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + or + exists(Node mid | + localStep(node, mid, true, _, config, _) and + revFlow(mid, toReturn, returnAp, ap, config) + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, config) and + localStep(node, mid, false, _, config, _) and + revFlow(mid, toReturn, returnAp, nil, config) and + ap instanceof ApNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + revFlow(mid, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, config) and + additionalJumpStep(node, mid, config) and + revFlow(mid, _, _, nil, config) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(Node mid, Ap ap0 | + revFlow(mid, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, toReturn, returnAp, ap, config) and + toReturn = false + or + exists(Ap returnAp0 | + revFlowInToReturn(call, node, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + ) + or + // flow out of a callable + revFlowOut(_, node, _, _, ap, config) and + toReturn = true and + if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, Node node, TypedContent tc, Node mid, boolean toReturn, + ApOption returnAp, Configuration config + ) { + revFlow(mid, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(Node mid | + revFlow(mid, _, _, tail, config) and + readStepFwd(_, cons, c, mid, tail, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(Node out, boolean allowsFieldFlow | + revFlow(out, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgumentNode arg, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(ParameterNode p, boolean allowsFieldFlow | + revFlow(p, toReturn, returnAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgumentNode arg, Ap returnAp, Ap ap, Configuration config + ) { + revFlowIn(call, arg, true, apSome(returnAp), ap, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, CcCall ccc | + revFlowOut(call, ret, toReturn, returnAp, ap, config) and + fwdFlow(ret, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + Node node1, Ap ap1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config + ) { + exists(Ap ap2, Content c | + store(node1, tc, node2, contentType) and + revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(Node node1, Content c, Node node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, ap2, config) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, /*unbind*/ ap2, _, _, _, _, _, unbind(config)) + ) + } + + predicate revFlow(Node node, Configuration config) { revFlow(node, _, _, _, config) } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParameterNode p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, true, apSome(ap0), ap, config) and + c = p.getEnclosingCallable() + } + + predicate parameterMayFlowThrough(ParameterNode p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnNodeExt ret, Ap ap0, ReturnKindExt kind, int pos | + parameterFlow(p, ap, ap0, c, config) and + c = ret.getEnclosingCallable() and + revFlow(ret, true, apSome(_), ap0, config) and + fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.isParameterOf(_, pos) and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(Node node | fwdFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + tuples = count(Node n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) + or + fwd = false and + nodes = count(Node node | revFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + tuples = count(Node n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) + } + /* End: Stage 3 logic. */ +} + +/** + * Holds if `argApf` is recorded as the summary context for flow reaching `node` + * and remains relevant for the following pruning stage. + */ +private predicate flowCandSummaryCtx(Node node, AccessPathFront argApf, Configuration config) { + exists(AccessPathFront apf | + Stage3::revFlow(node, true, _, apf, config) and + Stage3::fwdFlow(node, true, TAccessPathFrontSome(argApf), apf, config) + ) +} + +/** + * Holds if a length 2 access path approximation with the head `tc` is expected + * to be expensive. + */ +private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) { + exists(int tails, int nodes, int apLimit, int tupleLimit | + tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and + nodes = + strictcount(Node n | + Stage3::revFlow(n, _, _, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + or + flowCandSummaryCtx(n, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + ) and + accessPathApproxCostLimits(apLimit, tupleLimit) and + apLimit < tails and + tupleLimit < (tails - 1) * nodes + ) +} + +private newtype TAccessPathApprox = + TNil(DataFlowType t) or + TConsNil(TypedContent tc, DataFlowType t) { + Stage3::consCand(tc, TFrontNil(t), _) and + not expensiveLen2unfolding(tc, _) + } or + TConsCons(TypedContent tc1, TypedContent tc2, int len) { + Stage3::consCand(tc1, TFrontHead(tc2), _) and + len in [2 .. accessPathLimit()] and + not expensiveLen2unfolding(tc1, _) + } or + TCons1(TypedContent tc, int len) { + len in [1 .. accessPathLimit()] and + expensiveLen2unfolding(tc, _) + } + +/** + * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only + * the first two elements of the list and its length are tracked. If data flows + * from a source to a given node with a given `AccessPathApprox`, this indicates + * the sequence of dereference operations needed to get from the value in the node + * to the tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPathApprox extends TAccessPathApprox { + abstract string toString(); + + abstract TypedContent getHead(); + + abstract int len(); + + abstract DataFlowType getType(); + + abstract AccessPathFront getFront(); + + /** Gets the access path obtained by popping `head` from this path, if any. */ + abstract AccessPathApprox pop(TypedContent head); +} + +private class AccessPathApproxNil extends AccessPathApprox, TNil { + private DataFlowType t; + + AccessPathApproxNil() { this = TNil(t) } + + override string toString() { result = concat(": " + ppReprType(t)) } + + override TypedContent getHead() { none() } + + override int len() { result = 0 } + + override DataFlowType getType() { result = t } + + override AccessPathFront getFront() { result = TFrontNil(t) } + + override AccessPathApprox pop(TypedContent head) { none() } +} + +abstract private class AccessPathApproxCons extends AccessPathApprox { } + +private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil { + private TypedContent tc; + private DataFlowType t; + + AccessPathApproxConsNil() { this = TConsNil(tc, t) } + + override string toString() { + // The `concat` becomes "" if `ppReprType` has no result. + result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t)) + } + + override TypedContent getHead() { result = tc } + + override int len() { result = 1 } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) } +} + +private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons { + private TypedContent tc1; + private TypedContent tc2; + private int len; + + AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) } + + override string toString() { + if len = 2 + then result = "[" + tc1.toString() + ", " + tc2.toString() + "]" + else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc1 } + + override int len() { result = len } + + override DataFlowType getType() { result = tc1.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc1) } + + override AccessPathApprox pop(TypedContent head) { + head = tc1 and + ( + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + } +} + +private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 { + private TypedContent tc; + private int len; + + AccessPathApproxCons1() { this = TCons1(tc, len) } + + override string toString() { + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc } + + override int len() { result = len } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { + head = tc and + ( + exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) | + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + or + exists(DataFlowType t | + len = 1 and + Stage3::consCand(tc, TFrontNil(t), _) and + result = TNil(t) + ) + ) + } +} + +/** Gets the access path obtained by popping `tc` from `ap`, if any. */ +private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) } + +/** Gets the access path obtained by pushing `tc` onto `ap`. */ +private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) } + +private newtype TAccessPathApproxOption = + TAccessPathApproxNone() or + TAccessPathApproxSome(AccessPathApprox apa) + +private class AccessPathApproxOption extends TAccessPathApproxOption { + string toString() { + this = TAccessPathApproxNone() and result = "" + or + this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString())) + } +} + +private module Stage4 { + module PrevStage = Stage3; + + class ApApprox = PrevStage::Ap; + + class Ap = AccessPathApprox; + + class ApNil = AccessPathApproxNil; + + private ApApprox getApprox(Ap ap) { result = ap.getFront() } + + private ApNil getApNil(Node node) { result = TNil(getNodeType(node)) } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) } + + pragma[noinline] + private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathApproxOption; + + ApOption apNone() { result = TAccessPathApproxNone() } + + ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) } + + class Cc = CallContext; + + class CcCall = CallContextCall; + + class CcNoCall = CallContextNoCall; + + Cc ccNone() { result instanceof CallContextAny } + + private class LocalCc = LocalCallContext; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + c = resolveCall(call, outercc) and + if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall() + } + + bindingset[call, c] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { + if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone() + } + + bindingset[innercc, inner, call] + private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) { + resolveReturn(innercc, inner, call) + or + innercc.(CallContextCall).matchesCall(call) + } + + bindingset[node, cc, config] + private LocalCc getLocalCc(Node node, Cc cc, Configuration config) { + localFlowEntry(node, config) and + result = getLocalCallContext(cc, node.getEnclosingCallable()) + } + + private predicate localStep( + Node node1, Node node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, node2, preservesValue, ap.getFront(), config, lcc) + } + + pragma[nomagic] + private predicate flowOutOfCall( + DataFlowCall call, ReturnNodeExt node1, Node node2, boolean allowsFieldFlow, + Configuration config + ) { + flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, _, _, _, config) and + PrevStage::revFlow(node1, _, _, _, unbind(config)) + } + + pragma[nomagic] + private predicate flowIntoCall( + DataFlowCall call, ArgumentNode node1, ParameterNode node2, boolean allowsFieldFlow, + Configuration config + ) { + flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, _, _, _, config) and + PrevStage::revFlow(node1, _, _, _, unbind(config)) + } + + bindingset[node, ap] + private predicate filter(Node node, Ap ap) { any() } + + // Type checking is not necessary here as it has already been done in stage 3. + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } + + /* Begin: Stage 4 logic. */ + private predicate flowCand(Node node, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, _, _, apa, config) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + fwdFlow0(node, cc, argAp, ap, config) and + flowCand(node, getApprox(ap), config) and + filter(node, ap) + } + + pragma[nomagic] + private predicate fwdFlow0(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + flowCand(node, _, config) and + config.isSource(node) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + or + exists(Node mid, Ap ap0, LocalCc localCc | + fwdFlow(mid, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc, config) + | + localStep(mid, node, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, node, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(Node mid | + fwdFlow(mid, _, _, ap, config) and + flowCand(node, _, unbind(config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(mid, _, _, nil, config) and + flowCand(node, _, unbind(config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, any(CcNoCall innercc), cc, argAp, ap, config) + or + exists(Ap argAp0 | + fwdFlowOutFromArg(call, node, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + Node node1, Ap ap1, TypedContent tc, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, getApprox(ap1), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, Node node1, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + fwdFlow(node1, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParameterNode p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, + Configuration config + ) { + exists(ArgumentNode arg, boolean allowsFieldFlow | + fwdFlow(arg, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if flow may exit from `call` at `out` with access path `ap`. The + * inner call context is `innercc`, but `ccOut` is just the call context + * based on the return step. In the case of through-flow `ccOut` is discarded + * and replaced by the outer call context as tracked by `fwdFlowIsEntered`. + */ + pragma[nomagic] + private predicate fwdFlowOut( + DataFlowCall call, Node out, Cc innercc, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow, DataFlowCallable inner | + fwdFlow(ret, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = ret.getEnclosingCallable() and + checkCallContextReturn(innercc, inner, call) and + ccOut = getCallContextReturn(inner, call) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, Node out, Ap argAp, Ap ap, Configuration config + ) { + fwdFlowOut(call, out, any(CcCall ccc), _, apSome(argAp), ap, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParameterNode p | + fwdFlowIn(call, p, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, getApprox(ap), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + Node node1, Ap ap1, TypedContent tc, Node node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) + } + + private predicate readStepFwd(Node n1, Ap ap1, Content c, Node n2, Ap ap2, Configuration config) { + fwdFlowRead(ap1, c, n1, n2, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow(Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow0(node, toReturn, returnAp, ap, config) and + fwdFlow(node, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + or + exists(Node mid | + localStep(node, mid, true, _, config, _) and + revFlow(mid, toReturn, returnAp, ap, config) + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, config) and + localStep(node, mid, false, _, config, _) and + revFlow(mid, toReturn, returnAp, nil, config) and + ap instanceof ApNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + revFlow(mid, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, config) and + additionalJumpStep(node, mid, config) and + revFlow(mid, _, _, nil, config) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(Node mid, Ap ap0 | + revFlow(mid, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, toReturn, returnAp, ap, config) and + toReturn = false + or + exists(Ap returnAp0 | + revFlowInToReturn(call, node, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + ) + or + // flow out of a callable + revFlowOut(_, node, _, _, ap, config) and + toReturn = true and + if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, Node node, TypedContent tc, Node mid, boolean toReturn, + ApOption returnAp, Configuration config + ) { + revFlow(mid, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(Node mid | + revFlow(mid, _, _, tail, config) and + readStepFwd(_, cons, c, mid, tail, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(Node out, boolean allowsFieldFlow | + revFlow(out, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgumentNode arg, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(ParameterNode p, boolean allowsFieldFlow | + revFlow(p, toReturn, returnAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgumentNode arg, Ap returnAp, Ap ap, Configuration config + ) { + revFlowIn(call, arg, true, apSome(returnAp), ap, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, CcCall ccc | + revFlowOut(call, ret, toReturn, returnAp, ap, config) and + fwdFlow(ret, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + Node node1, Ap ap1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config + ) { + exists(Ap ap2, Content c | + store(node1, tc, node2, contentType) and + revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(Node node1, Content c, Node node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, ap2, config) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, /*unbind*/ ap2, _, _, _, _, _, unbind(config)) + ) + } + + predicate revFlow(Node node, Configuration config) { revFlow(node, _, _, _, config) } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParameterNode p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, true, apSome(ap0), ap, config) and + c = p.getEnclosingCallable() + } + + predicate parameterMayFlowThrough(ParameterNode p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnNodeExt ret, Ap ap0, ReturnKindExt kind, int pos | + parameterFlow(p, ap, ap0, c, config) and + c = ret.getEnclosingCallable() and + revFlow(ret, true, apSome(_), ap0, config) and + fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.isParameterOf(_, pos) and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(Node node | fwdFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + tuples = count(Node n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) + or + fwd = false and + nodes = count(Node node | revFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + tuples = count(Node n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) + } + /* End: Stage 4 logic. */ +} + +bindingset[conf, result] +private Configuration unbind(Configuration conf) { result >= conf and result <= conf } + +private predicate nodeMayUseSummary(Node n, AccessPathApprox apa, Configuration config) { + exists(DataFlowCallable c, AccessPathApprox apa0 | + Stage4::parameterMayFlowThrough(_, c, apa, _) and + Stage4::revFlow(n, true, _, apa0, config) and + Stage4::fwdFlow(n, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and + n.getEnclosingCallable() = c + ) +} + +private newtype TSummaryCtx = + TSummaryCtxNone() or + TSummaryCtxSome(ParameterNode p, AccessPath ap) { + Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), _) + } + +/** + * A context for generating flow summaries. This represents flow entry through + * a specific parameter with an access path of a specific shape. + * + * Summaries are only created for parameters that may flow through. + */ +abstract private class SummaryCtx extends TSummaryCtx { + abstract string toString(); +} + +/** A summary context from which no flow summary can be generated. */ +private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone { + override string toString() { result = "" } +} + +/** A summary context from which a flow summary can be generated. */ +private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { + private ParameterNode p; + private AccessPath ap; + + SummaryCtxSome() { this = TSummaryCtxSome(p, ap) } + + int getParameterPos() { p.isParameterOf(_, result) } + + override string toString() { result = p + ": " + ap } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * Gets the number of length 2 access path approximations that correspond to `apa`. + */ +private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) { + exists(TypedContent tc, int len | + tc = apa.getHead() and + len = apa.len() and + result = + strictcount(AccessPathFront apf | + Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1), + config) + ) + ) +} + +private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) { + result = + strictcount(Node n | Stage4::revFlow(n, _, _, apa, config) or nodeMayUseSummary(n, apa, config)) +} + +/** + * Holds if a length 2 access path approximation matching `apa` is expected + * to be expensive. + */ +private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) { + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = count1to2unfold(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + apLimit < aps and + tupleLimit < (aps - 1) * nodes + ) +} + +private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) { + exists(TypedContent head | + apa.pop(head) = result and + Stage4::consCand(head, result, config) + ) +} + +/** + * Holds with `unfold = false` if a precise head-tail representation of `apa` is + * expected to be expensive. Holds with `unfold = true` otherwise. + */ +private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) { + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = countPotentialAps(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true + ) +} + +/** + * Gets the number of `AccessPath`s that correspond to `apa`. + */ +private int countAps(AccessPathApprox apa, Configuration config) { + evalUnfold(apa, false, config) and + result = 1 and + (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config)) + or + evalUnfold(apa, false, config) and + result = count1to2unfold(apa, config) and + not expensiveLen1to2unfolding(apa, config) + or + evalUnfold(apa, true, config) and + result = countPotentialAps(apa, config) +} + +/** + * Gets the number of `AccessPath`s that would correspond to `apa` assuming + * that it is expanded to a precise head-tail representation. + */ +language[monotonicAggregates] +private int countPotentialAps(AccessPathApprox apa, Configuration config) { + apa instanceof AccessPathApproxNil and result = 1 + or + result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config)) +} + +private newtype TAccessPath = + TAccessPathNil(DataFlowType t) or + TAccessPathCons(TypedContent head, AccessPath tail) { + exists(AccessPathApproxCons apa | + not evalUnfold(apa, false, _) and + head = apa.getHead() and + tail.getApprox() = getATail(apa, _) + ) + } or + TAccessPathCons2(TypedContent head1, TypedContent head2, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + not expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head1 = apa.getHead() and + head2 = getATail(apa, _).getHead() + ) + } or + TAccessPathCons1(TypedContent head, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head = apa.getHead() + ) + } + +private newtype TPathNode = + TPathNodeMid(Node node, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config) { + // A PathNode is introduced by a source ... + Stage4::revFlow(node, config) and + config.isSource(node) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = TAccessPathNil(getNodeType(node)) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + pathStep(mid, node, cc, sc, ap) and + config = mid.getConfiguration() and + Stage4::revFlow(node, _, _, ap.getApprox(), unbind(config)) + ) + } or + TPathNodeSink(Node node, Configuration config) { + config.isSink(node) and + Stage4::revFlow(node, unbind(config)) and + ( + // A sink that is also a source ... + config.isSource(node) + or + // ... or a sink that can be reached from a source + exists(PathNodeMid mid | + pathStep(mid, node, _, _, TAccessPathNil(_)) and + config = unbind(mid.getConfiguration()) + ) + ) + } + +/** + * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a + * source to a given node with a given `AccessPath`, this indicates the sequence + * of dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPath extends TAccessPath { + /** Gets the head of this access path, if any. */ + abstract TypedContent getHead(); + + /** Gets the tail of this access path, if any. */ + abstract AccessPath getTail(); + + /** Gets the front of this access path. */ + abstract AccessPathFront getFront(); + + /** Gets the approximation of this access path. */ + abstract AccessPathApprox getApprox(); + + /** Gets the length of this access path. */ + abstract int length(); + + /** Gets a textual representation of this access path. */ + abstract string toString(); + + /** Gets the access path obtained by popping `tc` from this access path, if any. */ + final AccessPath pop(TypedContent tc) { + result = this.getTail() and + tc = this.getHead() + } + + /** Gets the access path obtained by pushing `tc` onto this access path. */ + final AccessPath push(TypedContent tc) { this = result.pop(tc) } +} + +private class AccessPathNil extends AccessPath, TAccessPathNil { + private DataFlowType t; + + AccessPathNil() { this = TAccessPathNil(t) } + + DataFlowType getType() { result = t } + + override TypedContent getHead() { none() } + + override AccessPath getTail() { none() } + + override AccessPathFrontNil getFront() { result = TFrontNil(t) } + + override AccessPathApproxNil getApprox() { result = TNil(t) } + + override int length() { result = 0 } + + override string toString() { result = concat(": " + ppReprType(t)) } +} + +private class AccessPathCons extends AccessPath, TAccessPathCons { + private TypedContent head; + private AccessPath tail; + + AccessPathCons() { this = TAccessPathCons(head, tail) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { result = tail } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { + result = TConsNil(head, tail.(AccessPathNil).getType()) + or + result = TConsCons(head, tail.getHead(), this.length()) + or + result = TCons1(head, this.length()) + } + + override int length() { result = 1 + tail.length() } + + private string toStringImpl(boolean needsSuffix) { + exists(DataFlowType t | + tail = TAccessPathNil(t) and + needsSuffix = false and + result = head.toString() + "]" + concat(" : " + ppReprType(t)) + ) + or + result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix) + or + exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) | + result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true + or + result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false + ) + or + exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) | + result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true + or + result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false + ) + } + + override string toString() { + result = "[" + this.toStringImpl(true) + length().toString() + ")]" + or + result = "[" + this.toStringImpl(false) + } +} + +private class AccessPathCons2 extends AccessPath, TAccessPathCons2 { + private TypedContent head1; + private TypedContent head2; + private int len; + + AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) } + + override TypedContent getHead() { result = head1 } + + override AccessPath getTail() { + Stage4::consCand(head1, result.getApprox(), _) and + result.getHead() = head2 and + result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head1) } + + override AccessPathApproxCons getApprox() { + result = TConsCons(head1, head2, len) or + result = TCons1(head1, len) + } + + override int length() { result = len } + + override string toString() { + if len = 2 + then result = "[" + head1.toString() + ", " + head2.toString() + "]" + else + result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]" + } +} + +private class AccessPathCons1 extends AccessPath, TAccessPathCons1 { + private TypedContent head; + private int len; + + AccessPathCons1() { this = TAccessPathCons1(head, len) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { + Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { result = TCons1(head, len) } + + override int length() { result = len } + + override string toString() { + if len = 1 + then result = "[" + head.toString() + "]" + else result = "[" + head.toString() + ", ... (" + len.toString() + ")]" + } +} + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { none() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { none() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + none() + } + + /** Gets the underlying `Node`. */ + Node getNode() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + private predicate isHidden() { + nodeIsHidden(this.getNode()) and + not this.isSource() and + not this instanceof PathNodeSink + } + + private PathNode getASuccessorIfHidden() { + this.isHidden() and + result = this.(PathNodeImpl).getASuccessorImpl() + } + + /** Gets a successor of this node, if any. */ + final PathNode getASuccessor() { + result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and + not this.isHidden() and + not result.isHidden() + } + + /** Holds if this node is a source. */ + predicate isSource() { none() } +} + +abstract private class PathNodeImpl extends PathNode { + abstract PathNode getASuccessorImpl(); + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + exists(string s | s = this.(PathNodeMid).getAp().toString() | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + this instanceof PathNodeSink and result = "" + or + result = " <" + this.(PathNodeMid).getCallContext().toString() + ">" + } + + override string toString() { result = this.getNode().toString() + ppAp() } + + override string toStringWithContext() { result = this.getNode().toString() + ppAp() + ppCtx() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** Holds if `n` can reach a sink. */ +private predicate reach(PathNode n) { n instanceof PathNodeSink or reach(n.getASuccessor()) } + +/** Holds if `n1.getSucc() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and reach(n2) } + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { pathSucc(a, b) } + + /** Holds if `n` is a node in the graph of data flow path explanations. */ + query predicate nodes(PathNode n, string key, string val) { + reach(n) and key = "semmle.label" and val = n.toString() + } +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNodeImpl, TPathNodeMid { + Node node; + CallContext cc; + SummaryCtx sc; + AccessPath ap; + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, cc, sc, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + SummaryCtx getSummaryCtx() { result = sc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + pathStep(this, result.getNode(), result.getCallContext(), result.getSummaryCtx(), result.getAp()) and + result.getConfiguration() = unbind(this.getConfiguration()) + } + + override PathNodeImpl getASuccessorImpl() { + // an intermediate step to another intermediate node + result = getSuccMid() + or + // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges + exists(PathNodeMid mid, PathNodeSink sink | + mid = getSuccMid() and + mid.getNode() = sink.getNode() and + mid.getAp() instanceof AccessPathNil and + sink.getConfiguration() = unbind(mid.getConfiguration()) and + result = sink + ) + } + + override predicate isSource() { + config.isSource(node) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap instanceof AccessPathNil + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNodeImpl, TPathNodeSink { + Node node; + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, config) } + + override Node getNode() { result = node } + + override Configuration getConfiguration() { result = config } + + override PathNode getASuccessorImpl() { none() } + + override predicate isSource() { config.isSource(node) } +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +private predicate pathStep(PathNodeMid mid, Node node, CallContext cc, SummaryCtx sc, AccessPath ap) { + exists(AccessPath ap0, Node midnode, Configuration conf, LocalCallContext localCC | + midnode = mid.getNode() and + conf = mid.getConfiguration() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + localCC = getLocalCallContext(cc, midnode.getEnclosingCallable()) and + ap0 = mid.getAp() + | + localFlowBigStep(midnode, node, true, _, conf, localCC) and + ap = ap0 + or + localFlowBigStep(midnode, node, false, ap.getFront(), conf, localCC) and + ap0 instanceof AccessPathNil + ) + or + jumpStep(mid.getNode(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = mid.getAp() + or + additionalJumpStep(mid.getNode(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TAccessPathNil(getNodeType(node)) + or + exists(TypedContent tc | pathStoreStep(mid, node, ap.pop(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + exists(TypedContent tc | pathReadStep(mid, node, ap.push(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp() + or + pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone + or + pathThroughCallable(mid, node, cc, ap) and sc = mid.getSummaryCtx() +} + +pragma[nomagic] +private predicate pathReadStep( + PathNodeMid mid, Node node, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + tc = ap0.getHead() and + Stage4::readStepCand(mid.getNode(), tc.getContent(), node, mid.getConfiguration()) and + cc = mid.getCallContext() +} + +pragma[nomagic] +private predicate pathStoreStep( + PathNodeMid mid, Node node, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + Stage4::storeStepCand(mid.getNode(), _, tc, node, _, mid.getConfiguration()) and + cc = mid.getCallContext() +} + +private predicate pathOutOfCallable0( + PathNodeMid mid, ReturnPosition pos, CallContext innercc, AccessPathApprox apa, + Configuration config +) { + pos = getReturnPosition(mid.getNode()) and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + apa = mid.getAp().getApprox() and + config = mid.getConfiguration() +} + +pragma[nomagic] +private predicate pathOutOfCallable1( + PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, AccessPathApprox apa, + Configuration config +) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + pathOutOfCallable0(mid, pos, innercc, apa, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) +} + +pragma[noinline] +private Node getAnOutNodeFlow( + ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config +) { + result = kind.getAnOutNode(call) and + Stage4::revFlow(result, _, _, apa, config) +} + +/** + * Holds if data may flow from `mid` to `out`. The last step of this path + * is a return from a callable and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate pathOutOfCallable(PathNodeMid mid, Node out, CallContext cc) { + exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config | + pathOutOfCallable1(mid, call, kind, cc, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate pathIntoArg( + PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap, AccessPathApprox apa +) { + exists(ArgumentNode arg | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() and + apa = ap.getApprox() + ) +} + +pragma[noinline] +private predicate parameterCand( + DataFlowCallable callable, int i, AccessPathApprox apa, Configuration config +) { + exists(ParameterNode p | + Stage4::revFlow(p, _, _, apa, config) and + p.isParameterOf(callable, i) + ) +} + +pragma[nomagic] +private predicate pathIntoCallable0( + PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call, + AccessPath ap +) { + exists(AccessPathApprox apa | + pathIntoArg(mid, i, outercc, call, ap, apa) and + callable = resolveCall(call, outercc) and + parameterCand(callable, any(int j | j <= i and j >= i), apa, mid.getConfiguration()) + ) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +private predicate pathIntoCallable( + PathNodeMid mid, ParameterNode p, CallContext outercc, CallContextCall innercc, SummaryCtx sc, + DataFlowCall call +) { + exists(int i, DataFlowCallable callable, AccessPath ap | + pathIntoCallable0(mid, callable, i, outercc, call, ap) and + p.isParameterOf(callable, i) and + ( + sc = TSummaryCtxSome(p, ap) + or + not exists(TSummaryCtxSome(p, ap)) and + sc = TSummaryCtxNone() + ) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) +} + +/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */ +pragma[nomagic] +private predicate paramFlowsThrough( + ReturnKindExt kind, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, AccessPathApprox apa, + Configuration config +) { + exists(PathNodeMid mid, ReturnNodeExt ret, int pos | + mid.getNode() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + config = mid.getConfiguration() and + ap = mid.getAp() and + apa = ap.getApprox() and + pos = sc.getParameterPos() and + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) +} + +pragma[nomagic] +private predicate pathThroughCallable0( + DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap, + AccessPathApprox apa +) { + exists(CallContext innercc, SummaryCtx sc | + pathIntoCallable(mid, _, cc, innercc, sc, call) and + paramFlowsThrough(kind, innercc, sc, ap, apa, unbind(mid.getConfiguration())) + ) +} + +/** + * Holds if data may flow from `mid` through a callable to the node `out`. + * The context `cc` is restored to its value prior to entering the callable. + */ +pragma[noinline] +private predicate pathThroughCallable(PathNodeMid mid, Node out, CallContext cc, AccessPath ap) { + exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa | + pathThroughCallable0(call, mid, kind, cc, ap, apa) and + out = getAnOutNodeFlow(kind, call, apa, unbind(mid.getConfiguration())) + ) +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration +) { + flowsource.isSource() and + flowsource.getConfiguration() = configuration and + flowsource.getNode() = source and + (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and + flowsink.getNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} + +private predicate finalStats(boolean fwd, int nodes, int fields, int conscand, int tuples) { + fwd = true and + nodes = count(Node n0 | exists(PathNode pn | pn.getNode() = n0)) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and + tuples = count(PathNode pn) + or + fwd = false and + nodes = count(Node n0 | exists(PathNode pn | pn.getNode() = n0 and reach(pn))) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and + tuples = count(PathNode pn | reach(pn)) +} + +/** + * INTERNAL: Only for debugging. + * + * Calculates per-stage metrics for data flow. + */ +predicate stageStats( + int n, string stage, int nodes, int fields, int conscand, int tuples, Configuration config +) { + stage = "1 Fwd" and n = 10 and Stage1::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "1 Rev" and n = 15 and Stage1::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "2 Fwd" and n = 20 and Stage2::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "2 Rev" and n = 25 and Stage2::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "3 Fwd" and n = 30 and Stage3::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "3 Rev" and n = 35 and Stage3::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "4 Fwd" and n = 40 and Stage4::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "4 Rev" and n = 45 and Stage4::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, tuples) + or + stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, tuples) +} + +private module FlowExploration { + private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) { + exists(Node node1, Node node2 | + jumpStep(node1, node2, config) + or + additionalJumpStep(node1, node2, config) + or + // flow into callable + viableParamArg(_, node2, node1) + or + // flow out of a callable + viableReturnPosOut(_, getReturnPosition(node1), node2) + | + c1 = node1.getEnclosingCallable() and + c2 = node2.getEnclosingCallable() and + c1 != c2 + ) + } + + private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSource(n) and c = n.getEnclosingCallable()) + or + exists(DataFlowCallable mid | + interestingCallableSrc(mid, config) and callableStep(mid, c, config) + ) + } + + private predicate interestingCallableSink(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSink(n) and c = n.getEnclosingCallable()) + or + exists(DataFlowCallable mid | + interestingCallableSink(mid, config) and callableStep(c, mid, config) + ) + } + + private newtype TCallableExt = + TCallable(DataFlowCallable c, Configuration config) { + interestingCallableSrc(c, config) or + interestingCallableSink(c, config) + } or + TCallableSrc() or + TCallableSink() + + private predicate callableExtSrc(TCallableSrc src) { any() } + + private predicate callableExtSink(TCallableSink sink) { any() } + + private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) { + exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config | + callableStep(c1, c2, config) and + ce1 = TCallable(c1, config) and + ce2 = TCallable(c2, unbind(config)) + ) + or + exists(Node n, Configuration config | + ce1 = TCallableSrc() and + config.isSource(n) and + ce2 = TCallable(n.getEnclosingCallable(), config) + ) + or + exists(Node n, Configuration config | + ce2 = TCallableSink() and + config.isSink(n) and + ce1 = TCallable(n.getEnclosingCallable(), config) + ) + } + + private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) { + callableExtStepFwd(ce2, ce1) + } + + private int distSrcExt(TCallableExt c) = + shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result) + + private int distSinkExt(TCallableExt c) = + shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result) + + private int distSrc(DataFlowCallable c, Configuration config) { + result = distSrcExt(TCallable(c, config)) - 1 + } + + private int distSink(DataFlowCallable c, Configuration config) { + result = distSinkExt(TCallable(c, config)) - 1 + } + + private newtype TPartialAccessPath = + TPartialNil(DataFlowType t) or + TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ + private class PartialAccessPath extends TPartialAccessPath { + abstract string toString(); + + TypedContent getHead() { this = TPartialCons(result, _) } + + int len() { + this = TPartialNil(_) and result = 0 + or + this = TPartialCons(_, result) + } + + DataFlowType getType() { + this = TPartialNil(result) + or + exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType()) + } + } + + private class PartialAccessPathNil extends PartialAccessPath, TPartialNil { + override string toString() { + exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t))) + } + } + + private class PartialAccessPathCons extends PartialAccessPath, TPartialCons { + override string toString() { + exists(TypedContent tc, int len | this = TPartialCons(tc, len) | + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private newtype TRevPartialAccessPath = + TRevPartialNil() or + TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `Content`s, but only the first + * element of the list and its length are tracked. + */ + private class RevPartialAccessPath extends TRevPartialAccessPath { + abstract string toString(); + + Content getHead() { this = TRevPartialCons(result, _) } + + int len() { + this = TRevPartialNil() and result = 0 + or + this = TRevPartialCons(_, result) + } + } + + private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil { + override string toString() { result = "" } + } + + private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons { + override string toString() { + exists(Content c, int len | this = TRevPartialCons(c, len) | + if len = 1 + then result = "[" + c.toString() + "]" + else result = "[" + c.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private newtype TSummaryCtx1 = + TSummaryCtx1None() or + TSummaryCtx1Param(ParameterNode p) + + private newtype TSummaryCtx2 = + TSummaryCtx2None() or + TSummaryCtx2Some(PartialAccessPath ap) + + private newtype TRevSummaryCtx1 = + TRevSummaryCtx1None() or + TRevSummaryCtx1Some(ReturnPosition pos) + + private newtype TRevSummaryCtx2 = + TRevSummaryCtx2None() or + TRevSummaryCtx2Some(RevPartialAccessPath ap) + + private newtype TPartialPathNode = + TPartialPathNodeFwd( + Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, + Configuration config + ) { + config.isSource(node) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap = TPartialNil(getNodeType(node)) and + not fullBarrier(node, config) and + exists(config.explorationLimit()) + or + partialPathNodeMk0(node, cc, sc1, sc2, ap, config) and + distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit() + } or + TPartialPathNodeRev( + Node node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, RevPartialAccessPath ap, + Configuration config + ) { + config.isSink(node) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = TRevPartialNil() and + not fullBarrier(node, config) and + exists(config.explorationLimit()) + or + exists(PartialPathNodeRev mid | + revPartialPathStep(mid, node, sc1, sc2, ap, config) and + not clearsContent(node, ap.getHead()) and + not fullBarrier(node, config) and + distSink(node.getEnclosingCallable(), config) <= config.explorationLimit() + ) + } + + pragma[nomagic] + private predicate partialPathNodeMk0( + Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, + Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStep(mid, node, cc, sc1, sc2, ap, config) and + not fullBarrier(node, config) and + not clearsContent(node, ap.getHead().getContent()) and + if node instanceof CastingNode + then compatibleTypes(getNodeType(node), ap.getType()) + else any() + ) + } + + /** + * A `Node` augmented with a call context, an access path, and a configuration. + */ + class PartialPathNode extends TPartialPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = this.getNode().toString() + this.ppAp() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { result = this.getNode().toString() + this.ppAp() + this.ppCtx() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets the underlying `Node`. */ + Node getNode() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + PartialPathNode getASuccessor() { none() } + + /** + * Gets the approximate distance to the nearest source measured in number + * of interprocedural steps. + */ + int getSourceDistance() { + result = distSrc(this.getNode().getEnclosingCallable(), this.getConfiguration()) + } + + /** + * Gets the approximate distance to the nearest sink measured in number + * of interprocedural steps. + */ + int getSinkDistance() { + result = distSink(this.getNode().getEnclosingCallable(), this.getConfiguration()) + } + + private string ppAp() { + exists(string s | + s = this.(PartialPathNodeFwd).getAp().toString() or + s = this.(PartialPathNodeRev).getAp().toString() + | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">" + } + + /** Holds if this is a source in a forward-flow path. */ + predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() } + + /** Holds if this is a sink in a reverse-flow path. */ + predicate isRevSink() { this.(PartialPathNodeRev).isSink() } + } + + /** + * Provides the query predicates needed to include a graph in a path-problem query. + */ + module PartialPathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b } + } + + private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd { + Node node; + CallContext cc; + TSummaryCtx1 sc1; + TSummaryCtx2 sc2; + PartialAccessPath ap; + Configuration config; + + PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, cc, sc1, sc2, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + TSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TSummaryCtx2 getSummaryCtx2() { result = sc2 } + + PartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeFwd getASuccessor() { + partialPathStep(this, result.getNode(), result.getCallContext(), result.getSummaryCtx1(), + result.getSummaryCtx2(), result.getAp(), result.getConfiguration()) + } + + predicate isSource() { + config.isSource(node) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap instanceof TPartialNil + } + } + + private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev { + Node node; + TRevSummaryCtx1 sc1; + TRevSummaryCtx2 sc2; + RevPartialAccessPath ap; + Configuration config; + + PartialPathNodeRev() { this = TPartialPathNodeRev(node, sc1, sc2, ap, config) } + + override Node getNode() { result = node } + + TRevSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TRevSummaryCtx2 getSummaryCtx2() { result = sc2 } + + RevPartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeRev getASuccessor() { + revPartialPathStep(result, this.getNode(), this.getSummaryCtx1(), this.getSummaryCtx2(), + this.getAp(), this.getConfiguration()) + } + + predicate isSink() { + config.isSink(node) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = TRevPartialNil() + } + } + + private predicate partialPathStep( + PartialPathNodeFwd mid, Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + PartialAccessPath ap, Configuration config + ) { + not isUnreachableInCall(node, cc.(CallContextSpecificCall).getCall()) and + ( + localFlowStep(mid.getNode(), node, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(mid.getNode(), node, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(getNodeType(node)) and + config = mid.getConfiguration() + ) + or + jumpStep(mid.getNode(), node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(mid.getNode(), node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(getNodeType(node)) and + config = mid.getConfiguration() + or + partialPathStoreStep(mid, _, _, node, ap) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() + or + exists(PartialAccessPath ap0, TypedContent tc | + partialPathReadStep(mid, ap0, tc, node, cc, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + apConsFwd(ap, tc, ap0, config) and + compatibleTypes(ap.getType(), getNodeType(node)) + ) + or + partialPathIntoCallable(mid, node, _, cc, sc1, sc2, _, ap, config) + or + partialPathOutOfCallable(mid, node, cc, ap, config) and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() + or + partialPathThroughCallable(mid, node, cc, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() + } + + bindingset[result, i] + private int unbindInt(int i) { i <= result and i >= result } + + pragma[inline] + private predicate partialPathStoreStep( + PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, Node node, PartialAccessPath ap2 + ) { + exists(Node midNode, DataFlowType contentType | + midNode = mid.getNode() and + ap1 = mid.getAp() and + store(midNode, tc, node, contentType) and + ap2.getHead() = tc and + ap2.len() = unbindInt(ap1.len() + 1) and + compatibleTypes(ap1.getType(), contentType) + ) + } + + pragma[nomagic] + private predicate apConsFwd( + PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStoreStep(mid, ap1, tc, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathReadStep( + PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, Node node, CallContext cc, + Configuration config + ) { + exists(Node midNode | + midNode = mid.getNode() and + ap = mid.getAp() and + read(midNode, tc.getContent(), node) and + ap.getHead() = tc and + config = mid.getConfiguration() and + cc = mid.getCallContext() + ) + } + + private predicate partialPathOutOfCallable0( + PartialPathNodeFwd mid, ReturnPosition pos, CallContext innercc, PartialAccessPath ap, + Configuration config + ) { + pos = getReturnPosition(mid.getNode()) and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + ap = mid.getAp() and + config = mid.getConfiguration() + } + + pragma[nomagic] + private predicate partialPathOutOfCallable1( + PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + partialPathOutOfCallable0(mid, pos, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) + } + + private predicate partialPathOutOfCallable( + PartialPathNodeFwd mid, Node out, CallContext cc, PartialAccessPath ap, Configuration config + ) { + exists(ReturnKindExt kind, DataFlowCall call | + partialPathOutOfCallable1(mid, call, kind, cc, ap, config) + | + out = kind.getAnOutNode(call) + ) + } + + pragma[noinline] + private predicate partialPathIntoArg( + PartialPathNodeFwd mid, int i, CallContext cc, DataFlowCall call, PartialAccessPath ap, + Configuration config + ) { + exists(ArgumentNode arg | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathIntoCallable0( + PartialPathNodeFwd mid, DataFlowCallable callable, int i, CallContext outercc, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + partialPathIntoArg(mid, i, outercc, call, ap, config) and + callable = resolveCall(call, outercc) + } + + private predicate partialPathIntoCallable( + PartialPathNodeFwd mid, ParameterNode p, CallContext outercc, CallContextCall innercc, + TSummaryCtx1 sc1, TSummaryCtx2 sc2, DataFlowCall call, PartialAccessPath ap, + Configuration config + ) { + exists(int i, DataFlowCallable callable | + partialPathIntoCallable0(mid, callable, i, outercc, call, ap, config) and + p.isParameterOf(callable, i) and + sc1 = TSummaryCtx1Param(p) and + sc2 = TSummaryCtx2Some(ap) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) + } + + pragma[nomagic] + private predicate paramFlowsThroughInPartialPath( + ReturnKindExt kind, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeFwd mid, ReturnNodeExt ret | + mid.getNode() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() and + ap = mid.getAp() + ) + } + + pragma[noinline] + private predicate partialPathThroughCallable0( + DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ParameterNode p, CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2 | + partialPathIntoCallable(mid, p, cc, innercc, sc1, sc2, call, _, config) and + paramFlowsThroughInPartialPath(kind, innercc, sc1, sc2, ap, config) + ) + } + + private predicate partialPathThroughCallable( + PartialPathNodeFwd mid, Node out, CallContext cc, PartialAccessPath ap, Configuration config + ) { + exists(DataFlowCall call, ReturnKindExt kind | + partialPathThroughCallable0(call, mid, kind, cc, ap, config) and + out = kind.getAnOutNode(call) + ) + } + + private predicate revPartialPathStep( + PartialPathNodeRev mid, Node node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, + RevPartialAccessPath ap, Configuration config + ) { + localFlowStep(node, mid.getNode(), config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(node, mid.getNode(), config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + jumpStep(node, mid.getNode(), config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(node, mid.getNode(), config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + revPartialPathReadStep(mid, _, _, node, ap) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() + or + exists(RevPartialAccessPath ap0, Content c | + revPartialPathStoreStep(mid, ap0, c, node, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + apConsRev(ap, c, ap0, config) + ) + or + exists(ParameterNode p | + mid.getNode() = p and + viableParamArg(_, p, node) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + or + exists(ReturnPosition pos | + revPartialPathIntoReturn(mid, pos, sc1, sc2, _, ap, config) and + pos = getReturnPosition(node) + ) + or + revPartialPathThroughCallable(mid, node, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() + } + + pragma[inline] + private predicate revPartialPathReadStep( + PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, Node node, RevPartialAccessPath ap2 + ) { + exists(Node midNode | + midNode = mid.getNode() and + ap1 = mid.getAp() and + read(node, c, midNode) and + ap2.getHead() = c and + ap2.len() = unbindInt(ap1.len() + 1) + ) + } + + pragma[nomagic] + private predicate apConsRev( + RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeRev mid | + revPartialPathReadStep(mid, ap1, c, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathStoreStep( + PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, Node node, Configuration config + ) { + exists(Node midNode, TypedContent tc | + midNode = mid.getNode() and + ap = mid.getAp() and + store(node, tc, midNode, _) and + ap.getHead() = c and + config = mid.getConfiguration() and + tc.getContent() = c + ) + } + + pragma[nomagic] + private predicate revPartialPathIntoReturn( + PartialPathNodeRev mid, ReturnPosition pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, + DataFlowCall call, RevPartialAccessPath ap, Configuration config + ) { + exists(Node out | + mid.getNode() = out and + viableReturnPosOut(call, pos, out) and + sc1 = TRevSummaryCtx1Some(pos) and + sc2 = TRevSummaryCtx2Some(ap) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathFlowsThrough( + int pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, RevPartialAccessPath ap, + Configuration config + ) { + exists(PartialPathNodeRev mid, ParameterNode p | + mid.getNode() = p and + p.isParameterOf(_, pos) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable0( + DataFlowCall call, PartialPathNodeRev mid, int pos, RevPartialAccessPath ap, + Configuration config + ) { + exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2 | + revPartialPathIntoReturn(mid, _, sc1, sc2, call, _, config) and + revPartialPathFlowsThrough(pos, sc1, sc2, ap, config) + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable( + PartialPathNodeRev mid, ArgumentNode node, RevPartialAccessPath ap, Configuration config + ) { + exists(DataFlowCall call, int pos | + revPartialPathThroughCallable0(call, mid, pos, ap, config) and + node.argumentOf(call, pos) + ) + } +} + +import FlowExploration + +private predicate partialFlow( + PartialPathNode source, PartialPathNode node, Configuration configuration +) { + source.getConfiguration() = configuration and + source.isFwdSource() and + node = source.getASuccessor+() +} + +private predicate revPartialFlow( + PartialPathNode node, PartialPathNode sink, Configuration configuration +) { + sink.getConfiguration() = configuration and + sink.isRevSink() and + node.getASuccessor+() = sink +} From fd18fd8403cfeef6fb46567eab2f2d295ce47e38 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 23 Feb 2021 15:24:52 +0100 Subject: [PATCH 21/25] Python: Apply suggestions from code review Co-authored-by: Felicity Chapman --- python/change-notes/2021-02-02-port-weak-crypto-key-query.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/change-notes/2021-02-02-port-weak-crypto-key-query.md b/python/change-notes/2021-02-02-port-weak-crypto-key-query.md index 93897c586e8..138c864265f 100644 --- a/python/change-notes/2021-02-02-port-weak-crypto-key-query.md +++ b/python/change-notes/2021-02-02-port-weak-crypto-key-query.md @@ -1,3 +1,3 @@ lgtm,codescanning -* Ported _Use of weak cryptographic key_ (`py/weak-crypto-key`) query to use new type-tracking approach instead of points-to. This might result in some difference in results being found, but overall this should result in a more robust and accurate analysis. -* Renamed the query file for _Use of weak cryptographic key_ (`py/weak-crypto-key`) from `WeakCrypto.ql` to `WeakCryptoKey.ql` (in the `python/ql/src/Security/CWE-326/` folder), which could impact custom query suites that include/exclude this query by using it's path. +* Updated _Use of weak cryptographic key_ (`py/weak-crypto-key`) query to use the new type-tracking approach instead of points-to analysis. You may see differences in the results found by the query, but overall this change should result in a more robust and accurate analysis. +* Renamed the query file for _Use of weak cryptographic key_ (`py/weak-crypto-key`) from `WeakCrypto.ql` to `WeakCryptoKey.ql` (in the `python/ql/src/Security/CWE-326/` folder). This will affect any custom query suites that include or exclude this query using its path. From c195c64982b8b11fd021c349873ab692673889ed Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 23 Feb 2021 17:16:49 +0100 Subject: [PATCH 22/25] Python: Use type-tracking for integer literal tracking Like we've done for pretty much everything else. An experiment to see what this means for query performance. --- config/identical-files.json | 3 +- python/ql/src/semmle/python/Concepts.qll | 44 +- .../dataflow/new/DataFlowOnlyInternalUse.qll | 40 - .../internal/DataFlowImplOnlyInternalUse.qll | 4153 ----------------- 4 files changed, 28 insertions(+), 4212 deletions(-) delete mode 100644 python/ql/src/semmle/python/dataflow/new/DataFlowOnlyInternalUse.qll delete mode 100644 python/ql/src/semmle/python/dataflow/new/internal/DataFlowImplOnlyInternalUse.qll diff --git a/config/identical-files.json b/config/identical-files.json index 5b84fab45cc..d68dabba861 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -22,8 +22,7 @@ "python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl.qll", "python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl2.qll", "python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl3.qll", - "python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl4.qll", - "python/ql/src/semmle/python/dataflow/new/internal/DataFlowImplOnlyInternalUse.qll" + "python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl4.qll" ], "DataFlow Java/C++/C#/Python Common": [ "java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplCommon.qll", diff --git a/python/ql/src/semmle/python/Concepts.qll b/python/ql/src/semmle/python/Concepts.qll index ed0a19d197a..ffc3c0382e5 100644 --- a/python/ql/src/semmle/python/Concepts.qll +++ b/python/ql/src/semmle/python/Concepts.qll @@ -6,7 +6,6 @@ import python private import semmle.python.dataflow.new.DataFlow -private import semmle.python.dataflow.new.DataFlowOnlyInternalUse private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.TaintTracking private import semmle.python.Frameworks @@ -563,19 +562,34 @@ module Cryptography { /** Provides classes for modeling new key-pair generation APIs. */ module KeyGeneration { - /** - * A data-flow configuration for tracking integer literals. - */ - private class IntegerLiteralTrackerConfiguration extends DataFlowOnlyInternalUse::Configuration { - IntegerLiteralTrackerConfiguration() { this = "IntegerLiteralTrackerConfiguration" } + /** Gets a reference to an integer literal, as well as the origin of the integer literal. */ + private DataFlow::Node keysizeTracker( + DataFlow::TypeTracker t, int keySize, DataFlow::Node origin + ) { + t.start() and + result.asExpr().(IntegerLiteral).getValue() = keySize and + origin = result + or + // Due to bad performance when using normal setup with we have inlined that code and forced a join + exists(DataFlow::TypeTracker t2 | + exists(DataFlow::StepSummary summary | + keysizeTracker_first_join(t2, keySize, origin, result, summary) and + t = t2.append(summary) + ) + ) + } - override predicate isSource(DataFlow::Node source) { - source = DataFlow::exprNode(any(IntegerLiteral size)) - } + pragma[nomagic] + private predicate keysizeTracker_first_join( + DataFlow::TypeTracker t2, int keySize, DataFlow::Node origin, DataFlow::Node res, + DataFlow::StepSummary summary + ) { + DataFlow::StepSummary::step(keysizeTracker(t2, keySize, origin), res, summary) + } - override predicate isSink(DataFlow::Node sink) { - sink = any(KeyGeneration::Range kg).getKeySizeArg() - } + /** Gets a reference to an integer literal, as well as the origin of the integer literal. */ + private DataFlow::Node keysizeTracker(int keySize, DataFlow::Node origin) { + result = keysizeTracker(DataFlow::TypeTracker::end(), keySize, origin) } /** @@ -596,11 +610,7 @@ module Cryptography { * explains how we obtained this specific key size. */ int getKeySizeWithOrigin(DataFlow::Node origin) { - exists(IntegerLiteral size, IntegerLiteralTrackerConfiguration config | - origin.asExpr() = size and - config.hasFlow(origin, this.getKeySizeArg()) and - result = size.getValue() - ) + this.getKeySizeArg() = keysizeTracker(result, origin) } /** Gets the minimum key size (in bits) for this algorithm to be considered secure. */ diff --git a/python/ql/src/semmle/python/dataflow/new/DataFlowOnlyInternalUse.qll b/python/ql/src/semmle/python/dataflow/new/DataFlowOnlyInternalUse.qll deleted file mode 100644 index 8768f25a534..00000000000 --- a/python/ql/src/semmle/python/dataflow/new/DataFlowOnlyInternalUse.qll +++ /dev/null @@ -1,40 +0,0 @@ -/** - * INTERNAL: Do not use. - * - * This copy exists to allow internal non-query usage of global data-flow analyses. If - * we used the same copy as was used in multiple queries (A, B, C), then all internal - * non-query configurations would have to be re-evaluated for _each_ query, which is - * expensive. By having a separate copy, we avoid this re-evaluation. - * - * Provides a library for local (intra-procedural) and global (inter-procedural) - * data flow analysis: deciding whether data can flow from a _source_ to a - * _sink_. - * - * Unless configured otherwise, _flow_ means that the exact value of - * the source may reach the sink. We do not track flow across pointer - * dereferences or array indexing. To track these types of flow, where the - * exact value may not be preserved, import - * `semmle.python.dataflow.new.TaintTracking`. - * - * To use global (interprocedural) data flow, extend the class - * `DataFlow::Configuration` as documented on that class. To use local - * (intraprocedural) data flow, call `DataFlow::localFlow` or - * `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`. - */ - -private import python - -/** - * INTERNAL: Do not use. - * - * This copy exists to allow internal non-query usage of global data-flow analyses. If - * we used the same copy as was used in multiple queries (A, B, C), then all internal - * non-query configurations would have to be re-evaluated for _each_ query, which is - * expensive. By having a separate copy, we avoid this re-evaluation. - * - * Provides classes for performing local (intra-procedural) and - * global (inter-procedural) data flow analyses. - */ -module DataFlowOnlyInternalUse { - import semmle.python.dataflow.new.internal.DataFlowImplOnlyInternalUse -} diff --git a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImplOnlyInternalUse.qll b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImplOnlyInternalUse.qll deleted file mode 100644 index 59cc8d529a7..00000000000 --- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImplOnlyInternalUse.qll +++ /dev/null @@ -1,4153 +0,0 @@ -/** - * Provides an implementation of global (interprocedural) data flow. This file - * re-exports the local (intraprocedural) data flow analysis from - * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed - * through the `Configuration` class. This file exists in several identical - * copies, allowing queries to use multiple `Configuration` classes that depend - * on each other without introducing mutual recursion among those configurations. - */ - -private import DataFlowImplCommon -private import DataFlowImplSpecific::Private -import DataFlowImplSpecific::Public - -/** - * A configuration of interprocedural data flow analysis. This defines - * sources, sinks, and any other configurable aspect of the analysis. Each - * use of the global data flow library must define its own unique extension - * of this abstract class. To create a configuration, extend this class with - * a subclass whose characteristic predicate is a unique singleton string. - * For example, write - * - * ```ql - * class MyAnalysisConfiguration extends DataFlow::Configuration { - * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } - * // Override `isSource` and `isSink`. - * // Optionally override `isBarrier`. - * // Optionally override `isAdditionalFlowStep`. - * } - * ``` - * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and - * the edges are those data-flow steps that preserve the value of the node - * along with any additional edges defined by `isAdditionalFlowStep`. - * Specifying nodes in `isBarrier` will remove those nodes from the graph, and - * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going - * and/or out-going edges from those nodes, respectively. - * - * Then, to query whether there is flow between some `source` and `sink`, - * write - * - * ```ql - * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) - * ``` - * - * Multiple configurations can coexist, but two classes extending - * `DataFlow::Configuration` should never depend on each other. One of them - * should instead depend on a `DataFlow2::Configuration`, a - * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. - */ -abstract class Configuration extends string { - bindingset[this] - Configuration() { any() } - - /** - * Holds if `source` is a relevant data flow source. - */ - abstract predicate isSource(Node source); - - /** - * Holds if `sink` is a relevant data flow sink. - */ - abstract predicate isSink(Node sink); - - /** - * Holds if data flow through `node` is prohibited. This completely removes - * `node` from the data flow graph. - */ - predicate isBarrier(Node node) { none() } - - /** Holds if data flow into `node` is prohibited. */ - predicate isBarrierIn(Node node) { none() } - - /** Holds if data flow out of `node` is prohibited. */ - predicate isBarrierOut(Node node) { none() } - - /** Holds if data flow through nodes guarded by `guard` is prohibited. */ - predicate isBarrierGuard(BarrierGuard guard) { none() } - - /** - * Holds if the additional flow step from `node1` to `node2` must be taken - * into account in the analysis. - */ - predicate isAdditionalFlowStep(Node node1, Node node2) { none() } - - /** - * Gets the virtual dispatch branching limit when calculating field flow. - * This can be overridden to a smaller value to improve performance (a - * value of 0 disables field flow), or a larger value to get more results. - */ - int fieldFlowBranchLimit() { result = 2 } - - /** - * Holds if data may flow from `source` to `sink` for this configuration. - */ - predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } - - /** - * Holds if data may flow from `source` to `sink` for this configuration. - * - * The corresponding paths are generated from the end-points and the graph - * included in the module `PathGraph`. - */ - predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } - - /** - * Holds if data may flow from some source to `sink` for this configuration. - */ - predicate hasFlowTo(Node sink) { hasFlow(_, sink) } - - /** - * Holds if data may flow from some source to `sink` for this configuration. - */ - predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) } - - /** - * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev` - * measured in approximate number of interprocedural steps. - */ - int explorationLimit() { none() } - - /** - * Holds if there is a partial data flow path from `source` to `node`. The - * approximate distance between `node` and the closest source is `dist` and - * is restricted to be less than or equal to `explorationLimit()`. This - * predicate completely disregards sink definitions. - * - * This predicate is intended for data-flow exploration and debugging and may - * perform poorly if the number of sources is too big and/or the exploration - * limit is set too high without using barriers. - * - * This predicate is disabled (has no results) by default. Override - * `explorationLimit()` with a suitable number to enable this predicate. - * - * To use this in a `path-problem` query, import the module `PartialPathGraph`. - */ - final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { - partialFlow(source, node, this) and - dist = node.getSourceDistance() - } - - /** - * Holds if there is a partial data flow path from `node` to `sink`. The - * approximate distance between `node` and the closest sink is `dist` and - * is restricted to be less than or equal to `explorationLimit()`. This - * predicate completely disregards source definitions. - * - * This predicate is intended for data-flow exploration and debugging and may - * perform poorly if the number of sinks is too big and/or the exploration - * limit is set too high without using barriers. - * - * This predicate is disabled (has no results) by default. Override - * `explorationLimit()` with a suitable number to enable this predicate. - * - * To use this in a `path-problem` query, import the module `PartialPathGraph`. - * - * Note that reverse flow has slightly lower precision than the corresponding - * forward flow, as reverse flow disregards type pruning among other features. - */ - final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) { - revPartialFlow(node, sink, this) and - dist = node.getSinkDistance() - } -} - -/** - * This class exists to prevent mutual recursion between the user-overridden - * member predicates of `Configuration` and the rest of the data-flow library. - * Good performance cannot be guaranteed in the presence of such recursion, so - * it should be replaced by using more than one copy of the data flow library. - */ -abstract private class ConfigurationRecursionPrevention extends Configuration { - bindingset[this] - ConfigurationRecursionPrevention() { any() } - - override predicate hasFlow(Node source, Node sink) { - strictcount(Node n | this.isSource(n)) < 0 - or - strictcount(Node n | this.isSink(n)) < 0 - or - strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 - or - super.hasFlow(source, sink) - } -} - -private predicate inBarrier(Node node, Configuration config) { - config.isBarrierIn(node) and - config.isSource(node) -} - -private predicate outBarrier(Node node, Configuration config) { - config.isBarrierOut(node) and - config.isSink(node) -} - -private predicate fullBarrier(Node node, Configuration config) { - config.isBarrier(node) - or - config.isBarrierIn(node) and - not config.isSource(node) - or - config.isBarrierOut(node) and - not config.isSink(node) - or - exists(BarrierGuard g | - config.isBarrierGuard(g) and - node = g.getAGuardedNode() - ) -} - -private class AdditionalFlowStepSource extends Node { - AdditionalFlowStepSource() { any(Configuration c).isAdditionalFlowStep(this, _) } -} - -pragma[noinline] -private predicate isAdditionalFlowStep( - AdditionalFlowStepSource node1, Node node2, DataFlowCallable callable1, Configuration config -) { - config.isAdditionalFlowStep(node1, node2) and - callable1 = node1.getEnclosingCallable() -} - -/** - * Holds if data can flow in one local step from `node1` to `node2`. - */ -private predicate localFlowStep(Node node1, Node node2, Configuration config) { - simpleLocalFlowStep(node1, node2) and - not outBarrier(node1, config) and - not inBarrier(node2, config) and - not fullBarrier(node1, config) and - not fullBarrier(node2, config) -} - -/** - * Holds if the additional step from `node1` to `node2` does not jump between callables. - */ -private predicate additionalLocalFlowStep(Node node1, Node node2, Configuration config) { - isAdditionalFlowStep(node1, node2, node2.getEnclosingCallable(), config) and - not outBarrier(node1, config) and - not inBarrier(node2, config) and - not fullBarrier(node1, config) and - not fullBarrier(node2, config) -} - -/** - * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. - */ -private predicate jumpStep(Node node1, Node node2, Configuration config) { - jumpStep(node1, node2) and - not outBarrier(node1, config) and - not inBarrier(node2, config) and - not fullBarrier(node1, config) and - not fullBarrier(node2, config) -} - -/** - * Holds if the additional step from `node1` to `node2` jumps between callables. - */ -private predicate additionalJumpStep(Node node1, Node node2, Configuration config) { - exists(DataFlowCallable callable1 | - isAdditionalFlowStep(node1, node2, callable1, config) and - node2.getEnclosingCallable() != callable1 and - not outBarrier(node1, config) and - not inBarrier(node2, config) and - not fullBarrier(node1, config) and - not fullBarrier(node2, config) - ) -} - -/** - * Holds if field flow should be used for the given configuration. - */ -private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } - -private module Stage1 { - class ApApprox = Unit; - - class Ap = Unit; - - class ApOption = Unit; - - class Cc = boolean; - - /* Begin: Stage 1 logic. */ - /** - * Holds if `node` is reachable from a source in the configuration `config`. - * - * The Boolean `cc` records whether the node is reached through an - * argument in a call. - */ - predicate fwdFlow(Node node, Cc cc, Configuration config) { - not fullBarrier(node, config) and - ( - config.isSource(node) and - cc = false - or - exists(Node mid | - fwdFlow(mid, cc, config) and - localFlowStep(mid, node, config) - ) - or - exists(Node mid | - fwdFlow(mid, cc, config) and - additionalLocalFlowStep(mid, node, config) - ) - or - exists(Node mid | - fwdFlow(mid, _, config) and - jumpStep(mid, node, config) and - cc = false - ) - or - exists(Node mid | - fwdFlow(mid, _, config) and - additionalJumpStep(mid, node, config) and - cc = false - ) - or - // store - exists(Node mid | - useFieldFlow(config) and - fwdFlow(mid, cc, config) and - store(mid, _, node, _) and - not outBarrier(mid, config) - ) - or - // read - exists(Content c | - fwdFlowRead(c, node, cc, config) and - fwdFlowConsCand(c, config) and - not inBarrier(node, config) - ) - or - // flow into a callable - exists(Node arg | - fwdFlow(arg, _, config) and - viableParamArg(_, node, arg) and - cc = true - ) - or - // flow out of a callable - exists(DataFlowCall call | - fwdFlowOut(call, node, false, config) and - cc = false - or - fwdFlowOutFromArg(call, node, config) and - fwdFlowIsEntered(call, cc, config) - ) - ) - } - - private predicate fwdFlow(Node node, Configuration config) { fwdFlow(node, _, config) } - - pragma[nomagic] - private predicate fwdFlowRead(Content c, Node node, Cc cc, Configuration config) { - exists(Node mid | - fwdFlow(mid, cc, config) and - read(mid, c, node) - ) - } - - /** - * Holds if `c` is the target of a store in the flow covered by `fwdFlow`. - */ - pragma[nomagic] - private predicate fwdFlowConsCand(Content c, Configuration config) { - exists(Node mid, Node node, TypedContent tc | - not fullBarrier(node, config) and - useFieldFlow(config) and - fwdFlow(mid, _, config) and - store(mid, tc, node, _) and - c = tc.getContent() - ) - } - - pragma[nomagic] - private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) { - exists(ReturnNodeExt ret | - fwdFlow(ret, cc, config) and - getReturnPosition(ret) = pos - ) - } - - pragma[nomagic] - private predicate fwdFlowOut(DataFlowCall call, Node out, Cc cc, Configuration config) { - exists(ReturnPosition pos | - fwdFlowReturnPosition(pos, cc, config) and - viableReturnPosOut(call, pos, out) - ) - } - - pragma[nomagic] - private predicate fwdFlowOutFromArg(DataFlowCall call, Node out, Configuration config) { - fwdFlowOut(call, out, true, config) - } - - /** - * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`. - */ - pragma[nomagic] - private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) { - exists(ArgumentNode arg | - fwdFlow(arg, cc, config) and - viableParamArg(call, _, arg) - ) - } - - /** - * Holds if `node` is part of a path from a source to a sink in the - * configuration `config`. - * - * The Boolean `toReturn` records whether the node must be returned from - * the enclosing callable in order to reach a sink. - */ - pragma[nomagic] - predicate revFlow(Node node, boolean toReturn, Configuration config) { - revFlow0(node, toReturn, config) and - fwdFlow(node, config) - } - - pragma[nomagic] - private predicate revFlow0(Node node, boolean toReturn, Configuration config) { - fwdFlow(node, config) and - config.isSink(node) and - toReturn = false - or - exists(Node mid | - localFlowStep(node, mid, config) and - revFlow(mid, toReturn, config) - ) - or - exists(Node mid | - additionalLocalFlowStep(node, mid, config) and - revFlow(mid, toReturn, config) - ) - or - exists(Node mid | - jumpStep(node, mid, config) and - revFlow(mid, _, config) and - toReturn = false - ) - or - exists(Node mid | - additionalJumpStep(node, mid, config) and - revFlow(mid, _, config) and - toReturn = false - ) - or - // store - exists(Content c | - revFlowStore(c, node, toReturn, config) and - revFlowConsCand(c, config) - ) - or - // read - exists(Node mid, Content c | - read(node, c, mid) and - fwdFlowConsCand(c, unbind(config)) and - revFlow(mid, toReturn, config) - ) - or - // flow into a callable - exists(DataFlowCall call | - revFlowIn(call, node, false, config) and - toReturn = false - or - revFlowInToReturn(call, node, config) and - revFlowIsReturned(call, toReturn, config) - ) - or - // flow out of a callable - exists(ReturnPosition pos | - revFlowOut(pos, config) and - getReturnPosition(node) = pos and - toReturn = true - ) - } - - /** - * Holds if `c` is the target of a read in the flow covered by `revFlow`. - */ - pragma[nomagic] - private predicate revFlowConsCand(Content c, Configuration config) { - exists(Node mid, Node node | - fwdFlow(node, unbind(config)) and - read(node, c, mid) and - fwdFlowConsCand(c, unbind(config)) and - revFlow(mid, _, config) - ) - } - - pragma[nomagic] - private predicate revFlowStore(Content c, Node node, boolean toReturn, Configuration config) { - exists(Node mid, TypedContent tc | - revFlow(mid, toReturn, config) and - fwdFlowConsCand(c, unbind(config)) and - store(node, tc, mid, _) and - c = tc.getContent() - ) - } - - /** - * Holds if `c` is the target of both a read and a store in the flow covered - * by `revFlow`. - */ - private predicate revFlowIsReadAndStored(Content c, Configuration conf) { - revFlowConsCand(c, conf) and - revFlowStore(c, _, _, conf) - } - - pragma[nomagic] - predicate viableReturnPosOutNodeCandFwd1( - DataFlowCall call, ReturnPosition pos, Node out, Configuration config - ) { - fwdFlowReturnPosition(pos, _, config) and - viableReturnPosOut(call, pos, out) - } - - pragma[nomagic] - private predicate revFlowOut(ReturnPosition pos, Configuration config) { - exists(DataFlowCall call, Node out | - revFlow(out, _, config) and - viableReturnPosOutNodeCandFwd1(call, pos, out, config) - ) - } - - pragma[nomagic] - predicate viableParamArgNodeCandFwd1( - DataFlowCall call, ParameterNode p, ArgumentNode arg, Configuration config - ) { - viableParamArg(call, p, arg) and - fwdFlow(arg, config) - } - - pragma[nomagic] - private predicate revFlowIn( - DataFlowCall call, ArgumentNode arg, boolean toReturn, Configuration config - ) { - exists(ParameterNode p | - revFlow(p, toReturn, config) and - viableParamArgNodeCandFwd1(call, p, arg, config) - ) - } - - pragma[nomagic] - private predicate revFlowInToReturn(DataFlowCall call, ArgumentNode arg, Configuration config) { - revFlowIn(call, arg, true, config) - } - - /** - * Holds if an output from `call` is reached in the flow covered by `revFlow`. - */ - pragma[nomagic] - private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) { - exists(Node out | - revFlow(out, toReturn, config) and - fwdFlowOutFromArg(call, out, config) - ) - } - - pragma[nomagic] - predicate storeStepCand( - Node node1, Ap ap1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config - ) { - exists(Content c | - revFlowIsReadAndStored(c, config) and - revFlow(node2, unbind(config)) and - store(node1, tc, node2, contentType) and - c = tc.getContent() and - exists(ap1) - ) - } - - pragma[nomagic] - predicate readStepCand(Node n1, Content c, Node n2, Configuration config) { - revFlowIsReadAndStored(c, config) and - revFlow(n2, unbind(config)) and - read(n1, c, n2) - } - - pragma[nomagic] - predicate revFlow(Node node, Configuration config) { revFlow(node, _, config) } - - predicate revFlow(Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { - revFlow(node, toReturn, config) and exists(returnAp) and exists(ap) - } - - private predicate throughFlowNodeCand(Node node, Configuration config) { - revFlow(node, true, config) and - fwdFlow(node, true, config) and - not inBarrier(node, config) and - not outBarrier(node, config) - } - - /** Holds if flow may return from `callable`. */ - pragma[nomagic] - private predicate returnFlowCallableNodeCand( - DataFlowCallable callable, ReturnKindExt kind, Configuration config - ) { - exists(ReturnNodeExt ret | - throughFlowNodeCand(ret, config) and - callable = ret.getEnclosingCallable() and - kind = ret.getKind() - ) - } - - /** - * Holds if flow may enter through `p` and reach a return node making `p` a - * candidate for the origin of a summary. - */ - predicate parameterMayFlowThrough(ParameterNode p, DataFlowCallable c, Ap ap, Configuration config) { - exists(ReturnKindExt kind | - throughFlowNodeCand(p, config) and - returnFlowCallableNodeCand(c, kind, config) and - p.getEnclosingCallable() = c and - exists(ap) and - // we don't expect a parameter to return stored in itself - not exists(int pos | - kind.(ParamUpdateReturnKind).getPosition() = pos and p.isParameterOf(_, pos) - ) - ) - } - - predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { - fwd = true and - nodes = count(Node node | fwdFlow(node, config)) and - fields = count(Content f0 | fwdFlowConsCand(f0, config)) and - conscand = -1 and - tuples = count(Node n, boolean b | fwdFlow(n, b, config)) - or - fwd = false and - nodes = count(Node node | revFlow(node, _, config)) and - fields = count(Content f0 | revFlowConsCand(f0, config)) and - conscand = -1 and - tuples = count(Node n, boolean b | revFlow(n, b, config)) - } - /* End: Stage 1 logic. */ -} - -bindingset[result, b] -private boolean unbindBool(boolean b) { result != b.booleanNot() } - -pragma[noinline] -private predicate localFlowStepNodeCand1(Node node1, Node node2, Configuration config) { - Stage1::revFlow(node2, config) and - localFlowStep(node1, node2, config) -} - -pragma[noinline] -private predicate additionalLocalFlowStepNodeCand1(Node node1, Node node2, Configuration config) { - Stage1::revFlow(node2, config) and - additionalLocalFlowStep(node1, node2, config) -} - -pragma[nomagic] -private predicate viableReturnPosOutNodeCand1( - DataFlowCall call, ReturnPosition pos, Node out, Configuration config -) { - Stage1::revFlow(out, config) and - Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config) -} - -/** - * Holds if data can flow out of `call` from `ret` to `out`, either - * through a `ReturnNode` or through an argument that has been mutated, and - * that this step is part of a path from a source to a sink. - */ -pragma[nomagic] -private predicate flowOutOfCallNodeCand1( - DataFlowCall call, ReturnNodeExt ret, Node out, Configuration config -) { - viableReturnPosOutNodeCand1(call, getReturnPosition(ret), out, config) and - Stage1::revFlow(ret, config) and - not outBarrier(ret, config) and - not inBarrier(out, config) -} - -pragma[nomagic] -private predicate viableParamArgNodeCand1( - DataFlowCall call, ParameterNode p, ArgumentNode arg, Configuration config -) { - Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and - Stage1::revFlow(arg, config) -} - -/** - * Holds if data can flow into `call` and that this step is part of a - * path from a source to a sink. - */ -pragma[nomagic] -private predicate flowIntoCallNodeCand1( - DataFlowCall call, ArgumentNode arg, ParameterNode p, Configuration config -) { - viableParamArgNodeCand1(call, p, arg, config) and - Stage1::revFlow(p, config) and - not outBarrier(arg, config) and - not inBarrier(p, config) -} - -/** - * Gets the amount of forward branching on the origin of a cross-call path - * edge in the graph of paths between sources and sinks that ignores call - * contexts. - */ -private int branch(Node n1, Configuration conf) { - result = - strictcount(Node n | - flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf) - ) -} - -/** - * Gets the amount of backward branching on the target of a cross-call path - * edge in the graph of paths between sources and sinks that ignores call - * contexts. - */ -private int join(Node n2, Configuration conf) { - result = - strictcount(Node n | - flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf) - ) -} - -/** - * Holds if data can flow out of `call` from `ret` to `out`, either - * through a `ReturnNode` or through an argument that has been mutated, and - * that this step is part of a path from a source to a sink. The - * `allowsFieldFlow` flag indicates whether the branching is within the limit - * specified by the configuration. - */ -pragma[nomagic] -private predicate flowOutOfCallNodeCand1( - DataFlowCall call, ReturnNodeExt ret, Node out, boolean allowsFieldFlow, Configuration config -) { - flowOutOfCallNodeCand1(call, ret, out, config) and - exists(int b, int j | - b = branch(ret, config) and - j = join(out, config) and - if b.minimum(j) <= config.fieldFlowBranchLimit() - then allowsFieldFlow = true - else allowsFieldFlow = false - ) -} - -/** - * Holds if data can flow into `call` and that this step is part of a - * path from a source to a sink. The `allowsFieldFlow` flag indicates whether - * the branching is within the limit specified by the configuration. - */ -pragma[nomagic] -private predicate flowIntoCallNodeCand1( - DataFlowCall call, ArgumentNode arg, ParameterNode p, boolean allowsFieldFlow, - Configuration config -) { - flowIntoCallNodeCand1(call, arg, p, config) and - exists(int b, int j | - b = branch(arg, config) and - j = join(p, config) and - if b.minimum(j) <= config.fieldFlowBranchLimit() - then allowsFieldFlow = true - else allowsFieldFlow = false - ) -} - -private module Stage2 { - module PrevStage = Stage1; - - class ApApprox = PrevStage::Ap; - - class Ap = boolean; - - class ApNil extends Ap { - ApNil() { this = false } - } - - bindingset[result, ap] - private ApApprox getApprox(Ap ap) { any() } - - private ApNil getApNil(Node node) { any() } - - bindingset[tc, tail] - private Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) } - - pragma[inline] - private Content getHeadContent(Ap ap) { exists(result) and ap = true } - - class ApOption = BooleanOption; - - ApOption apNone() { result = TBooleanNone() } - - ApOption apSome(Ap ap) { result = TBooleanSome(ap) } - - class Cc = boolean; - - class CcCall extends Cc { - CcCall() { this = true } - - /** Holds if this call context may be `call`. */ - predicate matchesCall(DataFlowCall call) { any() } - } - - class CcNoCall extends Cc { - CcNoCall() { this = false } - } - - Cc ccNone() { result = false } - - private class LocalCc = Unit; - - bindingset[call, c, outercc] - private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() } - - bindingset[call, c] - private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() } - - bindingset[innercc, inner, call] - private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) { - any() - } - - bindingset[node, cc, config] - private LocalCc getLocalCc(Node node, Cc cc, Configuration config) { any() } - - private predicate localStep( - Node node1, Node node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc - ) { - ( - preservesValue = true and - localFlowStepNodeCand1(node1, node2, config) - or - preservesValue = false and - additionalLocalFlowStepNodeCand1(node1, node2, config) - ) and - exists(ap) and - exists(lcc) - } - - private predicate flowOutOfCall = flowOutOfCallNodeCand1/5; - - private predicate flowIntoCall = flowIntoCallNodeCand1/5; - - bindingset[ap, contentType] - private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } - - /* Begin: Stage 2 logic. */ - private predicate flowCand(Node node, ApApprox apa, Configuration config) { - PrevStage::revFlow(node, _, _, apa, config) - } - - /** - * Holds if `node` is reachable with access path `ap` from a source in the - * configuration `config`. - * - * The call context `cc` records whether the node is reached through an - * argument in a call, and if so, `argAp` records the access path of that - * argument. - */ - pragma[nomagic] - predicate fwdFlow(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { - flowCand(node, _, config) and - config.isSource(node) and - cc = ccNone() and - argAp = apNone() and - ap = getApNil(node) - or - exists(Node mid, Ap ap0, LocalCc localCc | - fwdFlow(mid, cc, argAp, ap0, config) and - localCc = getLocalCc(mid, cc, config) - | - localStep(mid, node, true, _, config, localCc) and - ap = ap0 - or - localStep(mid, node, false, ap, config, localCc) and - ap0 instanceof ApNil - ) - or - exists(Node mid | - fwdFlow(mid, _, _, ap, config) and - flowCand(node, _, unbind(config)) and - jumpStep(mid, node, config) and - cc = ccNone() and - argAp = apNone() - ) - or - exists(Node mid, ApNil nil | - fwdFlow(mid, _, _, nil, config) and - flowCand(node, _, unbind(config)) and - additionalJumpStep(mid, node, config) and - cc = ccNone() and - argAp = apNone() and - ap = getApNil(node) - ) - or - // store - exists(TypedContent tc, Ap ap0 | - fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and - ap = apCons(tc, ap0) - ) - or - // read - exists(Ap ap0, Content c | - fwdFlowRead(ap0, c, _, node, cc, argAp, config) and - fwdFlowConsCand(ap0, c, ap, config) - ) - or - // flow into a callable - exists(ApApprox apa | - fwdFlowIn(_, node, _, cc, _, ap, config) and - apa = getApprox(ap) and - if PrevStage::parameterMayFlowThrough(node, _, apa, config) - then argAp = apSome(ap) - else argAp = apNone() - ) - or - // flow out of a callable - exists(DataFlowCall call | - fwdFlowOut(call, node, any(CcNoCall innercc), cc, argAp, ap, config) - or - exists(Ap argAp0 | - fwdFlowOutFromArg(call, node, argAp0, ap, config) and - fwdFlowIsEntered(call, cc, argAp, argAp0, config) - ) - ) - } - - pragma[nomagic] - private predicate fwdFlowStore( - Node node1, Ap ap1, TypedContent tc, Node node2, Cc cc, ApOption argAp, Configuration config - ) { - exists(DataFlowType contentType | - fwdFlow(node1, cc, argAp, ap1, config) and - PrevStage::storeStepCand(node1, getApprox(ap1), tc, node2, contentType, config) and - typecheckStore(ap1, contentType) - ) - } - - /** - * Holds if forward flow with access path `tail` reaches a store of `c` - * resulting in access path `cons`. - */ - pragma[nomagic] - private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { - exists(TypedContent tc | - fwdFlowStore(_, tail, tc, _, _, _, config) and - tc.getContent() = c and - cons = apCons(tc, tail) - ) - } - - pragma[nomagic] - private predicate fwdFlowRead( - Ap ap, Content c, Node node1, Node node2, Cc cc, ApOption argAp, Configuration config - ) { - fwdFlow(node1, cc, argAp, ap, config) and - PrevStage::readStepCand(node1, c, node2, config) and - getHeadContent(ap) = c - } - - pragma[nomagic] - private predicate fwdFlowIn( - DataFlowCall call, ParameterNode p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, - Configuration config - ) { - exists(ArgumentNode arg, boolean allowsFieldFlow | - fwdFlow(arg, outercc, argAp, ap, config) and - flowIntoCall(call, arg, p, allowsFieldFlow, config) and - innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) - | - ap instanceof ApNil or allowsFieldFlow = true - ) - } - - /** - * Holds if flow may exit from `call` at `out` with access path `ap`. The - * inner call context is `innercc`, but `ccOut` is just the call context - * based on the return step. In the case of through-flow `ccOut` is discarded - * and replaced by the outer call context as tracked by `fwdFlowIsEntered`. - */ - pragma[nomagic] - private predicate fwdFlowOut( - DataFlowCall call, Node out, Cc innercc, Cc ccOut, ApOption argAp, Ap ap, Configuration config - ) { - exists(ReturnNodeExt ret, boolean allowsFieldFlow, DataFlowCallable inner | - fwdFlow(ret, innercc, argAp, ap, config) and - flowOutOfCall(call, ret, out, allowsFieldFlow, config) and - inner = ret.getEnclosingCallable() and - checkCallContextReturn(innercc, inner, call) and - ccOut = getCallContextReturn(inner, call) - | - ap instanceof ApNil or allowsFieldFlow = true - ) - } - - pragma[nomagic] - private predicate fwdFlowOutFromArg( - DataFlowCall call, Node out, Ap argAp, Ap ap, Configuration config - ) { - fwdFlowOut(call, out, any(CcCall ccc), _, apSome(argAp), ap, config) - } - - /** - * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` - * and data might flow through the target callable and back out at `call`. - */ - pragma[nomagic] - private predicate fwdFlowIsEntered( - DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config - ) { - exists(ParameterNode p | - fwdFlowIn(call, p, cc, _, argAp, ap, config) and - PrevStage::parameterMayFlowThrough(p, _, getApprox(ap), config) - ) - } - - pragma[nomagic] - private predicate storeStepFwd( - Node node1, Ap ap1, TypedContent tc, Node node2, Ap ap2, Configuration config - ) { - fwdFlowStore(node1, ap1, tc, node2, _, _, config) and - ap2 = apCons(tc, ap1) and - fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) - } - - private predicate readStepFwd(Node n1, Ap ap1, Content c, Node n2, Ap ap2, Configuration config) { - fwdFlowRead(ap1, c, n1, n2, _, _, config) and - fwdFlowConsCand(ap1, c, ap2, config) - } - - /** - * Holds if `node` with access path `ap` is part of a path from a source to a - * sink in the configuration `config`. - * - * The Boolean `toReturn` records whether the node must be returned from the - * enclosing callable in order to reach a sink, and if so, `returnAp` records - * the access path of the returned value. - */ - pragma[nomagic] - predicate revFlow(Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { - revFlow0(node, toReturn, returnAp, ap, config) and - fwdFlow(node, _, _, ap, config) - } - - pragma[nomagic] - private predicate revFlow0( - Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config - ) { - fwdFlow(node, _, _, ap, config) and - config.isSink(node) and - toReturn = false and - returnAp = apNone() and - ap instanceof ApNil - or - exists(Node mid | - localStep(node, mid, true, _, config, _) and - revFlow(mid, toReturn, returnAp, ap, config) - ) - or - exists(Node mid, ApNil nil | - fwdFlow(node, _, _, ap, config) and - localStep(node, mid, false, _, config, _) and - revFlow(mid, toReturn, returnAp, nil, config) and - ap instanceof ApNil - ) - or - exists(Node mid | - jumpStep(node, mid, config) and - revFlow(mid, _, _, ap, config) and - toReturn = false and - returnAp = apNone() - ) - or - exists(Node mid, ApNil nil | - fwdFlow(node, _, _, ap, config) and - additionalJumpStep(node, mid, config) and - revFlow(mid, _, _, nil, config) and - toReturn = false and - returnAp = apNone() and - ap instanceof ApNil - ) - or - // store - exists(Ap ap0, Content c | - revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and - revFlowConsCand(ap0, c, ap, config) - ) - or - // read - exists(Node mid, Ap ap0 | - revFlow(mid, toReturn, returnAp, ap0, config) and - readStepFwd(node, ap, _, mid, ap0, config) - ) - or - // flow into a callable - exists(DataFlowCall call | - revFlowIn(call, node, toReturn, returnAp, ap, config) and - toReturn = false - or - exists(Ap returnAp0 | - revFlowInToReturn(call, node, returnAp0, ap, config) and - revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) - ) - ) - or - // flow out of a callable - revFlowOut(_, node, _, _, ap, config) and - toReturn = true and - if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) - then returnAp = apSome(ap) - else returnAp = apNone() - } - - pragma[nomagic] - private predicate revFlowStore( - Ap ap0, Content c, Ap ap, Node node, TypedContent tc, Node mid, boolean toReturn, - ApOption returnAp, Configuration config - ) { - revFlow(mid, toReturn, returnAp, ap0, config) and - storeStepFwd(node, ap, tc, mid, ap0, config) and - tc.getContent() = c - } - - /** - * Holds if reverse flow with access path `tail` reaches a read of `c` - * resulting in access path `cons`. - */ - pragma[nomagic] - private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { - exists(Node mid | - revFlow(mid, _, _, tail, config) and - readStepFwd(_, cons, c, mid, tail, config) - ) - } - - pragma[nomagic] - private predicate revFlowOut( - DataFlowCall call, ReturnNodeExt ret, boolean toReturn, ApOption returnAp, Ap ap, - Configuration config - ) { - exists(Node out, boolean allowsFieldFlow | - revFlow(out, toReturn, returnAp, ap, config) and - flowOutOfCall(call, ret, out, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true - ) - } - - pragma[nomagic] - private predicate revFlowIn( - DataFlowCall call, ArgumentNode arg, boolean toReturn, ApOption returnAp, Ap ap, - Configuration config - ) { - exists(ParameterNode p, boolean allowsFieldFlow | - revFlow(p, toReturn, returnAp, ap, config) and - flowIntoCall(call, arg, p, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true - ) - } - - pragma[nomagic] - private predicate revFlowInToReturn( - DataFlowCall call, ArgumentNode arg, Ap returnAp, Ap ap, Configuration config - ) { - revFlowIn(call, arg, true, apSome(returnAp), ap, config) - } - - /** - * Holds if an output from `call` is reached in the flow covered by `revFlow` - * and data might flow through the target callable resulting in reverse flow - * reaching an argument of `call`. - */ - pragma[nomagic] - private predicate revFlowIsReturned( - DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config - ) { - exists(ReturnNodeExt ret, CcCall ccc | - revFlowOut(call, ret, toReturn, returnAp, ap, config) and - fwdFlow(ret, ccc, apSome(_), ap, config) and - ccc.matchesCall(call) - ) - } - - pragma[nomagic] - predicate storeStepCand( - Node node1, Ap ap1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config - ) { - exists(Ap ap2, Content c | - store(node1, tc, node2, contentType) and - revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and - revFlowConsCand(ap2, c, ap1, config) - ) - } - - predicate readStepCand(Node node1, Content c, Node node2, Configuration config) { - exists(Ap ap1, Ap ap2 | - revFlow(node2, _, _, ap2, config) and - readStepFwd(node1, ap1, c, node2, ap2, config) and - revFlowStore(ap1, c, /*unbind*/ unbindBool(ap2), _, _, _, _, _, unbind(config)) - ) - } - - predicate revFlow(Node node, Configuration config) { revFlow(node, _, _, _, config) } - - private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { - storeStepFwd(_, ap, tc, _, _, config) - } - - predicate consCand(TypedContent tc, Ap ap, Configuration config) { - storeStepCand(_, ap, tc, _, _, config) - } - - pragma[noinline] - private predicate parameterFlow( - ParameterNode p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config - ) { - revFlow(p, true, apSome(ap0), ap, config) and - c = p.getEnclosingCallable() - } - - predicate parameterMayFlowThrough(ParameterNode p, DataFlowCallable c, Ap ap, Configuration config) { - exists(ReturnNodeExt ret, Ap ap0, ReturnKindExt kind, int pos | - parameterFlow(p, ap, ap0, c, config) and - c = ret.getEnclosingCallable() and - revFlow(ret, true, apSome(_), ap0, config) and - fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and - kind = ret.getKind() and - p.isParameterOf(_, pos) and - // we don't expect a parameter to return stored in itself - not kind.(ParamUpdateReturnKind).getPosition() = pos - ) - } - - predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { - fwd = true and - nodes = count(Node node | fwdFlow(node, _, _, _, config)) and - fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and - conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and - tuples = count(Node n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) - or - fwd = false and - nodes = count(Node node | revFlow(node, _, _, _, config)) and - fields = count(TypedContent f0 | consCand(f0, _, config)) and - conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and - tuples = count(Node n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) - } - /* End: Stage 2 logic. */ -} - -pragma[nomagic] -private predicate flowOutOfCallNodeCand2( - DataFlowCall call, ReturnNodeExt node1, Node node2, boolean allowsFieldFlow, Configuration config -) { - flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and - Stage2::revFlow(node2, config) and - Stage2::revFlow(node1, unbind(config)) -} - -pragma[nomagic] -private predicate flowIntoCallNodeCand2( - DataFlowCall call, ArgumentNode node1, ParameterNode node2, boolean allowsFieldFlow, - Configuration config -) { - flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and - Stage2::revFlow(node2, config) and - Stage2::revFlow(node1, unbind(config)) -} - -private module LocalFlowBigStep { - /** - * A node where some checking is required, and hence the big-step relation - * is not allowed to step over. - */ - private class FlowCheckNode extends Node { - FlowCheckNode() { - this instanceof CastNode or - clearsContent(this, _) - } - } - - /** - * Holds if `node` can be the first node in a maximal subsequence of local - * flow steps in a dataflow path. - */ - predicate localFlowEntry(Node node, Configuration config) { - Stage2::revFlow(node, config) and - ( - config.isSource(node) or - jumpStep(_, node, config) or - additionalJumpStep(_, node, config) or - node instanceof ParameterNode or - node instanceof OutNodeExt or - store(_, _, node, _) or - read(_, _, node) or - node instanceof FlowCheckNode - ) - } - - /** - * Holds if `node` can be the last node in a maximal subsequence of local - * flow steps in a dataflow path. - */ - private predicate localFlowExit(Node node, Configuration config) { - exists(Node next | Stage2::revFlow(next, config) | - jumpStep(node, next, config) or - additionalJumpStep(node, next, config) or - flowIntoCallNodeCand1(_, node, next, config) or - flowOutOfCallNodeCand1(_, node, next, config) or - store(node, _, next, _) or - read(node, _, next) - ) - or - node instanceof FlowCheckNode - or - config.isSink(node) - } - - pragma[noinline] - private predicate additionalLocalFlowStepNodeCand2(Node node1, Node node2, Configuration config) { - additionalLocalFlowStepNodeCand1(node1, node2, config) and - Stage2::revFlow(node1, _, _, false, config) and - Stage2::revFlow(node2, _, _, false, unbind(config)) - } - - /** - * Holds if the local path from `node1` to `node2` is a prefix of a maximal - * subsequence of local flow steps in a dataflow path. - * - * This is the transitive closure of `[additional]localFlowStep` beginning - * at `localFlowEntry`. - */ - pragma[nomagic] - private predicate localFlowStepPlus( - Node node1, Node node2, boolean preservesValue, DataFlowType t, Configuration config, - LocalCallContext cc - ) { - not isUnreachableInCall(node2, cc.(LocalCallContextSpecificCall).getCall()) and - ( - localFlowEntry(node1, config) and - ( - localFlowStepNodeCand1(node1, node2, config) and - preservesValue = true and - t = getNodeType(node1) - or - additionalLocalFlowStepNodeCand2(node1, node2, config) and - preservesValue = false and - t = getNodeType(node2) - ) and - node1 != node2 and - cc.relevantFor(node1.getEnclosingCallable()) and - not isUnreachableInCall(node1, cc.(LocalCallContextSpecificCall).getCall()) and - Stage2::revFlow(node2, unbind(config)) - or - exists(Node mid | - localFlowStepPlus(node1, mid, preservesValue, t, config, cc) and - localFlowStepNodeCand1(mid, node2, config) and - not mid instanceof FlowCheckNode and - Stage2::revFlow(node2, unbind(config)) - ) - or - exists(Node mid | - localFlowStepPlus(node1, mid, _, _, config, cc) and - additionalLocalFlowStepNodeCand2(mid, node2, config) and - not mid instanceof FlowCheckNode and - preservesValue = false and - t = getNodeType(node2) and - Stage2::revFlow(node2, unbind(config)) - ) - ) - } - - /** - * Holds if `node1` can step to `node2` in one or more local steps and this - * path can occur as a maximal subsequence of local steps in a dataflow path. - */ - pragma[nomagic] - predicate localFlowBigStep( - Node node1, Node node2, boolean preservesValue, AccessPathFrontNil apf, Configuration config, - LocalCallContext callContext - ) { - localFlowStepPlus(node1, node2, preservesValue, apf.getType(), config, callContext) and - localFlowExit(node2, config) - } -} - -private import LocalFlowBigStep - -private module Stage3 { - module PrevStage = Stage2; - - class ApApprox = PrevStage::Ap; - - class Ap = AccessPathFront; - - class ApNil = AccessPathFrontNil; - - private ApApprox getApprox(Ap ap) { result = ap.toBoolNonEmpty() } - - private ApNil getApNil(Node node) { result = TFrontNil(getNodeType(node)) } - - bindingset[tc, tail] - private Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) } - - pragma[noinline] - private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } - - class ApOption = AccessPathFrontOption; - - ApOption apNone() { result = TAccessPathFrontNone() } - - ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) } - - class Cc = boolean; - - class CcCall extends Cc { - CcCall() { this = true } - - /** Holds if this call context may be `call`. */ - predicate matchesCall(DataFlowCall call) { any() } - } - - class CcNoCall extends Cc { - CcNoCall() { this = false } - } - - Cc ccNone() { result = false } - - private class LocalCc = Unit; - - bindingset[call, c, outercc] - private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() } - - bindingset[call, c] - private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() } - - bindingset[innercc, inner, call] - private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) { - any() - } - - bindingset[node, cc, config] - private LocalCc getLocalCc(Node node, Cc cc, Configuration config) { any() } - - private predicate localStep( - Node node1, Node node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc - ) { - localFlowBigStep(node1, node2, preservesValue, ap, config, _) and exists(lcc) - } - - private predicate flowOutOfCall = flowOutOfCallNodeCand2/5; - - private predicate flowIntoCall = flowIntoCallNodeCand2/5; - - bindingset[node, ap] - private predicate filter(Node node, Ap ap) { - not ap.isClearedAt(node) and - if node instanceof CastingNode then compatibleTypes(getNodeType(node), ap.getType()) else any() - } - - bindingset[ap, contentType] - private predicate typecheckStore(Ap ap, DataFlowType contentType) { - // We need to typecheck stores here, since reverse flow through a getter - // might have a different type here compared to inside the getter. - compatibleTypes(ap.getType(), contentType) - } - - /* Begin: Stage 3 logic. */ - private predicate flowCand(Node node, ApApprox apa, Configuration config) { - PrevStage::revFlow(node, _, _, apa, config) - } - - /** - * Holds if `node` is reachable with access path `ap` from a source in the - * configuration `config`. - * - * The call context `cc` records whether the node is reached through an - * argument in a call, and if so, `argAp` records the access path of that - * argument. - */ - pragma[nomagic] - predicate fwdFlow(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { - fwdFlow0(node, cc, argAp, ap, config) and - flowCand(node, unbindBool(getApprox(ap)), config) and - filter(node, ap) - } - - pragma[nomagic] - private predicate fwdFlow0(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { - flowCand(node, _, config) and - config.isSource(node) and - cc = ccNone() and - argAp = apNone() and - ap = getApNil(node) - or - exists(Node mid, Ap ap0, LocalCc localCc | - fwdFlow(mid, cc, argAp, ap0, config) and - localCc = getLocalCc(mid, cc, config) - | - localStep(mid, node, true, _, config, localCc) and - ap = ap0 - or - localStep(mid, node, false, ap, config, localCc) and - ap0 instanceof ApNil - ) - or - exists(Node mid | - fwdFlow(mid, _, _, ap, config) and - flowCand(node, _, unbind(config)) and - jumpStep(mid, node, config) and - cc = ccNone() and - argAp = apNone() - ) - or - exists(Node mid, ApNil nil | - fwdFlow(mid, _, _, nil, config) and - flowCand(node, _, unbind(config)) and - additionalJumpStep(mid, node, config) and - cc = ccNone() and - argAp = apNone() and - ap = getApNil(node) - ) - or - // store - exists(TypedContent tc, Ap ap0 | - fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and - ap = apCons(tc, ap0) - ) - or - // read - exists(Ap ap0, Content c | - fwdFlowRead(ap0, c, _, node, cc, argAp, config) and - fwdFlowConsCand(ap0, c, ap, config) - ) - or - // flow into a callable - exists(ApApprox apa | - fwdFlowIn(_, node, _, cc, _, ap, config) and - apa = getApprox(ap) and - if PrevStage::parameterMayFlowThrough(node, _, apa, config) - then argAp = apSome(ap) - else argAp = apNone() - ) - or - // flow out of a callable - exists(DataFlowCall call | - fwdFlowOut(call, node, any(CcNoCall innercc), cc, argAp, ap, config) - or - exists(Ap argAp0 | - fwdFlowOutFromArg(call, node, argAp0, ap, config) and - fwdFlowIsEntered(call, cc, argAp, argAp0, config) - ) - ) - } - - pragma[nomagic] - private predicate fwdFlowStore( - Node node1, Ap ap1, TypedContent tc, Node node2, Cc cc, ApOption argAp, Configuration config - ) { - exists(DataFlowType contentType | - fwdFlow(node1, cc, argAp, ap1, config) and - PrevStage::storeStepCand(node1, unbindBool(getApprox(ap1)), tc, node2, contentType, config) and - typecheckStore(ap1, contentType) - ) - } - - /** - * Holds if forward flow with access path `tail` reaches a store of `c` - * resulting in access path `cons`. - */ - pragma[nomagic] - private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { - exists(TypedContent tc | - fwdFlowStore(_, tail, tc, _, _, _, config) and - tc.getContent() = c and - cons = apCons(tc, tail) - ) - } - - pragma[nomagic] - private predicate fwdFlowRead( - Ap ap, Content c, Node node1, Node node2, Cc cc, ApOption argAp, Configuration config - ) { - fwdFlow(node1, cc, argAp, ap, config) and - PrevStage::readStepCand(node1, c, node2, config) and - getHeadContent(ap) = c - } - - pragma[nomagic] - private predicate fwdFlowIn( - DataFlowCall call, ParameterNode p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, - Configuration config - ) { - exists(ArgumentNode arg, boolean allowsFieldFlow | - fwdFlow(arg, outercc, argAp, ap, config) and - flowIntoCall(call, arg, p, allowsFieldFlow, config) and - innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) - | - ap instanceof ApNil or allowsFieldFlow = true - ) - } - - /** - * Holds if flow may exit from `call` at `out` with access path `ap`. The - * inner call context is `innercc`, but `ccOut` is just the call context - * based on the return step. In the case of through-flow `ccOut` is discarded - * and replaced by the outer call context as tracked by `fwdFlowIsEntered`. - */ - pragma[nomagic] - private predicate fwdFlowOut( - DataFlowCall call, Node out, Cc innercc, Cc ccOut, ApOption argAp, Ap ap, Configuration config - ) { - exists(ReturnNodeExt ret, boolean allowsFieldFlow, DataFlowCallable inner | - fwdFlow(ret, innercc, argAp, ap, config) and - flowOutOfCall(call, ret, out, allowsFieldFlow, config) and - inner = ret.getEnclosingCallable() and - checkCallContextReturn(innercc, inner, call) and - ccOut = getCallContextReturn(inner, call) - | - ap instanceof ApNil or allowsFieldFlow = true - ) - } - - pragma[nomagic] - private predicate fwdFlowOutFromArg( - DataFlowCall call, Node out, Ap argAp, Ap ap, Configuration config - ) { - fwdFlowOut(call, out, any(CcCall ccc), _, apSome(argAp), ap, config) - } - - /** - * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` - * and data might flow through the target callable and back out at `call`. - */ - pragma[nomagic] - private predicate fwdFlowIsEntered( - DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config - ) { - exists(ParameterNode p | - fwdFlowIn(call, p, cc, _, argAp, ap, config) and - PrevStage::parameterMayFlowThrough(p, _, unbindBool(getApprox(ap)), config) - ) - } - - pragma[nomagic] - private predicate storeStepFwd( - Node node1, Ap ap1, TypedContent tc, Node node2, Ap ap2, Configuration config - ) { - fwdFlowStore(node1, ap1, tc, node2, _, _, config) and - ap2 = apCons(tc, ap1) and - fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) - } - - private predicate readStepFwd(Node n1, Ap ap1, Content c, Node n2, Ap ap2, Configuration config) { - fwdFlowRead(ap1, c, n1, n2, _, _, config) and - fwdFlowConsCand(ap1, c, ap2, config) - } - - /** - * Holds if `node` with access path `ap` is part of a path from a source to a - * sink in the configuration `config`. - * - * The Boolean `toReturn` records whether the node must be returned from the - * enclosing callable in order to reach a sink, and if so, `returnAp` records - * the access path of the returned value. - */ - pragma[nomagic] - predicate revFlow(Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { - revFlow0(node, toReturn, returnAp, ap, config) and - fwdFlow(node, _, _, ap, config) - } - - pragma[nomagic] - private predicate revFlow0( - Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config - ) { - fwdFlow(node, _, _, ap, config) and - config.isSink(node) and - toReturn = false and - returnAp = apNone() and - ap instanceof ApNil - or - exists(Node mid | - localStep(node, mid, true, _, config, _) and - revFlow(mid, toReturn, returnAp, ap, config) - ) - or - exists(Node mid, ApNil nil | - fwdFlow(node, _, _, ap, config) and - localStep(node, mid, false, _, config, _) and - revFlow(mid, toReturn, returnAp, nil, config) and - ap instanceof ApNil - ) - or - exists(Node mid | - jumpStep(node, mid, config) and - revFlow(mid, _, _, ap, config) and - toReturn = false and - returnAp = apNone() - ) - or - exists(Node mid, ApNil nil | - fwdFlow(node, _, _, ap, config) and - additionalJumpStep(node, mid, config) and - revFlow(mid, _, _, nil, config) and - toReturn = false and - returnAp = apNone() and - ap instanceof ApNil - ) - or - // store - exists(Ap ap0, Content c | - revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and - revFlowConsCand(ap0, c, ap, config) - ) - or - // read - exists(Node mid, Ap ap0 | - revFlow(mid, toReturn, returnAp, ap0, config) and - readStepFwd(node, ap, _, mid, ap0, config) - ) - or - // flow into a callable - exists(DataFlowCall call | - revFlowIn(call, node, toReturn, returnAp, ap, config) and - toReturn = false - or - exists(Ap returnAp0 | - revFlowInToReturn(call, node, returnAp0, ap, config) and - revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) - ) - ) - or - // flow out of a callable - revFlowOut(_, node, _, _, ap, config) and - toReturn = true and - if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) - then returnAp = apSome(ap) - else returnAp = apNone() - } - - pragma[nomagic] - private predicate revFlowStore( - Ap ap0, Content c, Ap ap, Node node, TypedContent tc, Node mid, boolean toReturn, - ApOption returnAp, Configuration config - ) { - revFlow(mid, toReturn, returnAp, ap0, config) and - storeStepFwd(node, ap, tc, mid, ap0, config) and - tc.getContent() = c - } - - /** - * Holds if reverse flow with access path `tail` reaches a read of `c` - * resulting in access path `cons`. - */ - pragma[nomagic] - private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { - exists(Node mid | - revFlow(mid, _, _, tail, config) and - readStepFwd(_, cons, c, mid, tail, config) - ) - } - - pragma[nomagic] - private predicate revFlowOut( - DataFlowCall call, ReturnNodeExt ret, boolean toReturn, ApOption returnAp, Ap ap, - Configuration config - ) { - exists(Node out, boolean allowsFieldFlow | - revFlow(out, toReturn, returnAp, ap, config) and - flowOutOfCall(call, ret, out, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true - ) - } - - pragma[nomagic] - private predicate revFlowIn( - DataFlowCall call, ArgumentNode arg, boolean toReturn, ApOption returnAp, Ap ap, - Configuration config - ) { - exists(ParameterNode p, boolean allowsFieldFlow | - revFlow(p, toReturn, returnAp, ap, config) and - flowIntoCall(call, arg, p, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true - ) - } - - pragma[nomagic] - private predicate revFlowInToReturn( - DataFlowCall call, ArgumentNode arg, Ap returnAp, Ap ap, Configuration config - ) { - revFlowIn(call, arg, true, apSome(returnAp), ap, config) - } - - /** - * Holds if an output from `call` is reached in the flow covered by `revFlow` - * and data might flow through the target callable resulting in reverse flow - * reaching an argument of `call`. - */ - pragma[nomagic] - private predicate revFlowIsReturned( - DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config - ) { - exists(ReturnNodeExt ret, CcCall ccc | - revFlowOut(call, ret, toReturn, returnAp, ap, config) and - fwdFlow(ret, ccc, apSome(_), ap, config) and - ccc.matchesCall(call) - ) - } - - pragma[nomagic] - predicate storeStepCand( - Node node1, Ap ap1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config - ) { - exists(Ap ap2, Content c | - store(node1, tc, node2, contentType) and - revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and - revFlowConsCand(ap2, c, ap1, config) - ) - } - - predicate readStepCand(Node node1, Content c, Node node2, Configuration config) { - exists(Ap ap1, Ap ap2 | - revFlow(node2, _, _, ap2, config) and - readStepFwd(node1, ap1, c, node2, ap2, config) and - revFlowStore(ap1, c, /*unbind*/ ap2, _, _, _, _, _, unbind(config)) - ) - } - - predicate revFlow(Node node, Configuration config) { revFlow(node, _, _, _, config) } - - private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { - storeStepFwd(_, ap, tc, _, _, config) - } - - predicate consCand(TypedContent tc, Ap ap, Configuration config) { - storeStepCand(_, ap, tc, _, _, config) - } - - pragma[noinline] - private predicate parameterFlow( - ParameterNode p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config - ) { - revFlow(p, true, apSome(ap0), ap, config) and - c = p.getEnclosingCallable() - } - - predicate parameterMayFlowThrough(ParameterNode p, DataFlowCallable c, Ap ap, Configuration config) { - exists(ReturnNodeExt ret, Ap ap0, ReturnKindExt kind, int pos | - parameterFlow(p, ap, ap0, c, config) and - c = ret.getEnclosingCallable() and - revFlow(ret, true, apSome(_), ap0, config) and - fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and - kind = ret.getKind() and - p.isParameterOf(_, pos) and - // we don't expect a parameter to return stored in itself - not kind.(ParamUpdateReturnKind).getPosition() = pos - ) - } - - predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { - fwd = true and - nodes = count(Node node | fwdFlow(node, _, _, _, config)) and - fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and - conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and - tuples = count(Node n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) - or - fwd = false and - nodes = count(Node node | revFlow(node, _, _, _, config)) and - fields = count(TypedContent f0 | consCand(f0, _, config)) and - conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and - tuples = count(Node n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) - } - /* End: Stage 3 logic. */ -} - -/** - * Holds if `argApf` is recorded as the summary context for flow reaching `node` - * and remains relevant for the following pruning stage. - */ -private predicate flowCandSummaryCtx(Node node, AccessPathFront argApf, Configuration config) { - exists(AccessPathFront apf | - Stage3::revFlow(node, true, _, apf, config) and - Stage3::fwdFlow(node, true, TAccessPathFrontSome(argApf), apf, config) - ) -} - -/** - * Holds if a length 2 access path approximation with the head `tc` is expected - * to be expensive. - */ -private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) { - exists(int tails, int nodes, int apLimit, int tupleLimit | - tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and - nodes = - strictcount(Node n | - Stage3::revFlow(n, _, _, any(AccessPathFrontHead apf | apf.getHead() = tc), config) - or - flowCandSummaryCtx(n, any(AccessPathFrontHead apf | apf.getHead() = tc), config) - ) and - accessPathApproxCostLimits(apLimit, tupleLimit) and - apLimit < tails and - tupleLimit < (tails - 1) * nodes - ) -} - -private newtype TAccessPathApprox = - TNil(DataFlowType t) or - TConsNil(TypedContent tc, DataFlowType t) { - Stage3::consCand(tc, TFrontNil(t), _) and - not expensiveLen2unfolding(tc, _) - } or - TConsCons(TypedContent tc1, TypedContent tc2, int len) { - Stage3::consCand(tc1, TFrontHead(tc2), _) and - len in [2 .. accessPathLimit()] and - not expensiveLen2unfolding(tc1, _) - } or - TCons1(TypedContent tc, int len) { - len in [1 .. accessPathLimit()] and - expensiveLen2unfolding(tc, _) - } - -/** - * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only - * the first two elements of the list and its length are tracked. If data flows - * from a source to a given node with a given `AccessPathApprox`, this indicates - * the sequence of dereference operations needed to get from the value in the node - * to the tracked object. The final type indicates the type of the tracked object. - */ -abstract private class AccessPathApprox extends TAccessPathApprox { - abstract string toString(); - - abstract TypedContent getHead(); - - abstract int len(); - - abstract DataFlowType getType(); - - abstract AccessPathFront getFront(); - - /** Gets the access path obtained by popping `head` from this path, if any. */ - abstract AccessPathApprox pop(TypedContent head); -} - -private class AccessPathApproxNil extends AccessPathApprox, TNil { - private DataFlowType t; - - AccessPathApproxNil() { this = TNil(t) } - - override string toString() { result = concat(": " + ppReprType(t)) } - - override TypedContent getHead() { none() } - - override int len() { result = 0 } - - override DataFlowType getType() { result = t } - - override AccessPathFront getFront() { result = TFrontNil(t) } - - override AccessPathApprox pop(TypedContent head) { none() } -} - -abstract private class AccessPathApproxCons extends AccessPathApprox { } - -private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil { - private TypedContent tc; - private DataFlowType t; - - AccessPathApproxConsNil() { this = TConsNil(tc, t) } - - override string toString() { - // The `concat` becomes "" if `ppReprType` has no result. - result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t)) - } - - override TypedContent getHead() { result = tc } - - override int len() { result = 1 } - - override DataFlowType getType() { result = tc.getContainerType() } - - override AccessPathFront getFront() { result = TFrontHead(tc) } - - override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) } -} - -private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons { - private TypedContent tc1; - private TypedContent tc2; - private int len; - - AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) } - - override string toString() { - if len = 2 - then result = "[" + tc1.toString() + ", " + tc2.toString() + "]" - else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]" - } - - override TypedContent getHead() { result = tc1 } - - override int len() { result = len } - - override DataFlowType getType() { result = tc1.getContainerType() } - - override AccessPathFront getFront() { result = TFrontHead(tc1) } - - override AccessPathApprox pop(TypedContent head) { - head = tc1 and - ( - result = TConsCons(tc2, _, len - 1) - or - len = 2 and - result = TConsNil(tc2, _) - or - result = TCons1(tc2, len - 1) - ) - } -} - -private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 { - private TypedContent tc; - private int len; - - AccessPathApproxCons1() { this = TCons1(tc, len) } - - override string toString() { - if len = 1 - then result = "[" + tc.toString() + "]" - else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" - } - - override TypedContent getHead() { result = tc } - - override int len() { result = len } - - override DataFlowType getType() { result = tc.getContainerType() } - - override AccessPathFront getFront() { result = TFrontHead(tc) } - - override AccessPathApprox pop(TypedContent head) { - head = tc and - ( - exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) | - result = TConsCons(tc2, _, len - 1) - or - len = 2 and - result = TConsNil(tc2, _) - or - result = TCons1(tc2, len - 1) - ) - or - exists(DataFlowType t | - len = 1 and - Stage3::consCand(tc, TFrontNil(t), _) and - result = TNil(t) - ) - ) - } -} - -/** Gets the access path obtained by popping `tc` from `ap`, if any. */ -private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) } - -/** Gets the access path obtained by pushing `tc` onto `ap`. */ -private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) } - -private newtype TAccessPathApproxOption = - TAccessPathApproxNone() or - TAccessPathApproxSome(AccessPathApprox apa) - -private class AccessPathApproxOption extends TAccessPathApproxOption { - string toString() { - this = TAccessPathApproxNone() and result = "" - or - this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString())) - } -} - -private module Stage4 { - module PrevStage = Stage3; - - class ApApprox = PrevStage::Ap; - - class Ap = AccessPathApprox; - - class ApNil = AccessPathApproxNil; - - private ApApprox getApprox(Ap ap) { result = ap.getFront() } - - private ApNil getApNil(Node node) { result = TNil(getNodeType(node)) } - - bindingset[tc, tail] - private Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) } - - pragma[noinline] - private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } - - class ApOption = AccessPathApproxOption; - - ApOption apNone() { result = TAccessPathApproxNone() } - - ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) } - - class Cc = CallContext; - - class CcCall = CallContextCall; - - class CcNoCall = CallContextNoCall; - - Cc ccNone() { result instanceof CallContextAny } - - private class LocalCc = LocalCallContext; - - bindingset[call, c, outercc] - private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { - c = resolveCall(call, outercc) and - if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall() - } - - bindingset[call, c] - private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { - if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone() - } - - bindingset[innercc, inner, call] - private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) { - resolveReturn(innercc, inner, call) - or - innercc.(CallContextCall).matchesCall(call) - } - - bindingset[node, cc, config] - private LocalCc getLocalCc(Node node, Cc cc, Configuration config) { - localFlowEntry(node, config) and - result = getLocalCallContext(cc, node.getEnclosingCallable()) - } - - private predicate localStep( - Node node1, Node node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc - ) { - localFlowBigStep(node1, node2, preservesValue, ap.getFront(), config, lcc) - } - - pragma[nomagic] - private predicate flowOutOfCall( - DataFlowCall call, ReturnNodeExt node1, Node node2, boolean allowsFieldFlow, - Configuration config - ) { - flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and - PrevStage::revFlow(node2, _, _, _, config) and - PrevStage::revFlow(node1, _, _, _, unbind(config)) - } - - pragma[nomagic] - private predicate flowIntoCall( - DataFlowCall call, ArgumentNode node1, ParameterNode node2, boolean allowsFieldFlow, - Configuration config - ) { - flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and - PrevStage::revFlow(node2, _, _, _, config) and - PrevStage::revFlow(node1, _, _, _, unbind(config)) - } - - bindingset[node, ap] - private predicate filter(Node node, Ap ap) { any() } - - // Type checking is not necessary here as it has already been done in stage 3. - bindingset[ap, contentType] - private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } - - /* Begin: Stage 4 logic. */ - private predicate flowCand(Node node, ApApprox apa, Configuration config) { - PrevStage::revFlow(node, _, _, apa, config) - } - - /** - * Holds if `node` is reachable with access path `ap` from a source in the - * configuration `config`. - * - * The call context `cc` records whether the node is reached through an - * argument in a call, and if so, `argAp` records the access path of that - * argument. - */ - pragma[nomagic] - predicate fwdFlow(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { - fwdFlow0(node, cc, argAp, ap, config) and - flowCand(node, getApprox(ap), config) and - filter(node, ap) - } - - pragma[nomagic] - private predicate fwdFlow0(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { - flowCand(node, _, config) and - config.isSource(node) and - cc = ccNone() and - argAp = apNone() and - ap = getApNil(node) - or - exists(Node mid, Ap ap0, LocalCc localCc | - fwdFlow(mid, cc, argAp, ap0, config) and - localCc = getLocalCc(mid, cc, config) - | - localStep(mid, node, true, _, config, localCc) and - ap = ap0 - or - localStep(mid, node, false, ap, config, localCc) and - ap0 instanceof ApNil - ) - or - exists(Node mid | - fwdFlow(mid, _, _, ap, config) and - flowCand(node, _, unbind(config)) and - jumpStep(mid, node, config) and - cc = ccNone() and - argAp = apNone() - ) - or - exists(Node mid, ApNil nil | - fwdFlow(mid, _, _, nil, config) and - flowCand(node, _, unbind(config)) and - additionalJumpStep(mid, node, config) and - cc = ccNone() and - argAp = apNone() and - ap = getApNil(node) - ) - or - // store - exists(TypedContent tc, Ap ap0 | - fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and - ap = apCons(tc, ap0) - ) - or - // read - exists(Ap ap0, Content c | - fwdFlowRead(ap0, c, _, node, cc, argAp, config) and - fwdFlowConsCand(ap0, c, ap, config) - ) - or - // flow into a callable - exists(ApApprox apa | - fwdFlowIn(_, node, _, cc, _, ap, config) and - apa = getApprox(ap) and - if PrevStage::parameterMayFlowThrough(node, _, apa, config) - then argAp = apSome(ap) - else argAp = apNone() - ) - or - // flow out of a callable - exists(DataFlowCall call | - fwdFlowOut(call, node, any(CcNoCall innercc), cc, argAp, ap, config) - or - exists(Ap argAp0 | - fwdFlowOutFromArg(call, node, argAp0, ap, config) and - fwdFlowIsEntered(call, cc, argAp, argAp0, config) - ) - ) - } - - pragma[nomagic] - private predicate fwdFlowStore( - Node node1, Ap ap1, TypedContent tc, Node node2, Cc cc, ApOption argAp, Configuration config - ) { - exists(DataFlowType contentType | - fwdFlow(node1, cc, argAp, ap1, config) and - PrevStage::storeStepCand(node1, getApprox(ap1), tc, node2, contentType, config) and - typecheckStore(ap1, contentType) - ) - } - - /** - * Holds if forward flow with access path `tail` reaches a store of `c` - * resulting in access path `cons`. - */ - pragma[nomagic] - private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { - exists(TypedContent tc | - fwdFlowStore(_, tail, tc, _, _, _, config) and - tc.getContent() = c and - cons = apCons(tc, tail) - ) - } - - pragma[nomagic] - private predicate fwdFlowRead( - Ap ap, Content c, Node node1, Node node2, Cc cc, ApOption argAp, Configuration config - ) { - fwdFlow(node1, cc, argAp, ap, config) and - PrevStage::readStepCand(node1, c, node2, config) and - getHeadContent(ap) = c - } - - pragma[nomagic] - private predicate fwdFlowIn( - DataFlowCall call, ParameterNode p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, - Configuration config - ) { - exists(ArgumentNode arg, boolean allowsFieldFlow | - fwdFlow(arg, outercc, argAp, ap, config) and - flowIntoCall(call, arg, p, allowsFieldFlow, config) and - innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) - | - ap instanceof ApNil or allowsFieldFlow = true - ) - } - - /** - * Holds if flow may exit from `call` at `out` with access path `ap`. The - * inner call context is `innercc`, but `ccOut` is just the call context - * based on the return step. In the case of through-flow `ccOut` is discarded - * and replaced by the outer call context as tracked by `fwdFlowIsEntered`. - */ - pragma[nomagic] - private predicate fwdFlowOut( - DataFlowCall call, Node out, Cc innercc, Cc ccOut, ApOption argAp, Ap ap, Configuration config - ) { - exists(ReturnNodeExt ret, boolean allowsFieldFlow, DataFlowCallable inner | - fwdFlow(ret, innercc, argAp, ap, config) and - flowOutOfCall(call, ret, out, allowsFieldFlow, config) and - inner = ret.getEnclosingCallable() and - checkCallContextReturn(innercc, inner, call) and - ccOut = getCallContextReturn(inner, call) - | - ap instanceof ApNil or allowsFieldFlow = true - ) - } - - pragma[nomagic] - private predicate fwdFlowOutFromArg( - DataFlowCall call, Node out, Ap argAp, Ap ap, Configuration config - ) { - fwdFlowOut(call, out, any(CcCall ccc), _, apSome(argAp), ap, config) - } - - /** - * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` - * and data might flow through the target callable and back out at `call`. - */ - pragma[nomagic] - private predicate fwdFlowIsEntered( - DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config - ) { - exists(ParameterNode p | - fwdFlowIn(call, p, cc, _, argAp, ap, config) and - PrevStage::parameterMayFlowThrough(p, _, getApprox(ap), config) - ) - } - - pragma[nomagic] - private predicate storeStepFwd( - Node node1, Ap ap1, TypedContent tc, Node node2, Ap ap2, Configuration config - ) { - fwdFlowStore(node1, ap1, tc, node2, _, _, config) and - ap2 = apCons(tc, ap1) and - fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) - } - - private predicate readStepFwd(Node n1, Ap ap1, Content c, Node n2, Ap ap2, Configuration config) { - fwdFlowRead(ap1, c, n1, n2, _, _, config) and - fwdFlowConsCand(ap1, c, ap2, config) - } - - /** - * Holds if `node` with access path `ap` is part of a path from a source to a - * sink in the configuration `config`. - * - * The Boolean `toReturn` records whether the node must be returned from the - * enclosing callable in order to reach a sink, and if so, `returnAp` records - * the access path of the returned value. - */ - pragma[nomagic] - predicate revFlow(Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { - revFlow0(node, toReturn, returnAp, ap, config) and - fwdFlow(node, _, _, ap, config) - } - - pragma[nomagic] - private predicate revFlow0( - Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config - ) { - fwdFlow(node, _, _, ap, config) and - config.isSink(node) and - toReturn = false and - returnAp = apNone() and - ap instanceof ApNil - or - exists(Node mid | - localStep(node, mid, true, _, config, _) and - revFlow(mid, toReturn, returnAp, ap, config) - ) - or - exists(Node mid, ApNil nil | - fwdFlow(node, _, _, ap, config) and - localStep(node, mid, false, _, config, _) and - revFlow(mid, toReturn, returnAp, nil, config) and - ap instanceof ApNil - ) - or - exists(Node mid | - jumpStep(node, mid, config) and - revFlow(mid, _, _, ap, config) and - toReturn = false and - returnAp = apNone() - ) - or - exists(Node mid, ApNil nil | - fwdFlow(node, _, _, ap, config) and - additionalJumpStep(node, mid, config) and - revFlow(mid, _, _, nil, config) and - toReturn = false and - returnAp = apNone() and - ap instanceof ApNil - ) - or - // store - exists(Ap ap0, Content c | - revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and - revFlowConsCand(ap0, c, ap, config) - ) - or - // read - exists(Node mid, Ap ap0 | - revFlow(mid, toReturn, returnAp, ap0, config) and - readStepFwd(node, ap, _, mid, ap0, config) - ) - or - // flow into a callable - exists(DataFlowCall call | - revFlowIn(call, node, toReturn, returnAp, ap, config) and - toReturn = false - or - exists(Ap returnAp0 | - revFlowInToReturn(call, node, returnAp0, ap, config) and - revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) - ) - ) - or - // flow out of a callable - revFlowOut(_, node, _, _, ap, config) and - toReturn = true and - if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) - then returnAp = apSome(ap) - else returnAp = apNone() - } - - pragma[nomagic] - private predicate revFlowStore( - Ap ap0, Content c, Ap ap, Node node, TypedContent tc, Node mid, boolean toReturn, - ApOption returnAp, Configuration config - ) { - revFlow(mid, toReturn, returnAp, ap0, config) and - storeStepFwd(node, ap, tc, mid, ap0, config) and - tc.getContent() = c - } - - /** - * Holds if reverse flow with access path `tail` reaches a read of `c` - * resulting in access path `cons`. - */ - pragma[nomagic] - private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { - exists(Node mid | - revFlow(mid, _, _, tail, config) and - readStepFwd(_, cons, c, mid, tail, config) - ) - } - - pragma[nomagic] - private predicate revFlowOut( - DataFlowCall call, ReturnNodeExt ret, boolean toReturn, ApOption returnAp, Ap ap, - Configuration config - ) { - exists(Node out, boolean allowsFieldFlow | - revFlow(out, toReturn, returnAp, ap, config) and - flowOutOfCall(call, ret, out, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true - ) - } - - pragma[nomagic] - private predicate revFlowIn( - DataFlowCall call, ArgumentNode arg, boolean toReturn, ApOption returnAp, Ap ap, - Configuration config - ) { - exists(ParameterNode p, boolean allowsFieldFlow | - revFlow(p, toReturn, returnAp, ap, config) and - flowIntoCall(call, arg, p, allowsFieldFlow, config) - | - ap instanceof ApNil or allowsFieldFlow = true - ) - } - - pragma[nomagic] - private predicate revFlowInToReturn( - DataFlowCall call, ArgumentNode arg, Ap returnAp, Ap ap, Configuration config - ) { - revFlowIn(call, arg, true, apSome(returnAp), ap, config) - } - - /** - * Holds if an output from `call` is reached in the flow covered by `revFlow` - * and data might flow through the target callable resulting in reverse flow - * reaching an argument of `call`. - */ - pragma[nomagic] - private predicate revFlowIsReturned( - DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config - ) { - exists(ReturnNodeExt ret, CcCall ccc | - revFlowOut(call, ret, toReturn, returnAp, ap, config) and - fwdFlow(ret, ccc, apSome(_), ap, config) and - ccc.matchesCall(call) - ) - } - - pragma[nomagic] - predicate storeStepCand( - Node node1, Ap ap1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config - ) { - exists(Ap ap2, Content c | - store(node1, tc, node2, contentType) and - revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and - revFlowConsCand(ap2, c, ap1, config) - ) - } - - predicate readStepCand(Node node1, Content c, Node node2, Configuration config) { - exists(Ap ap1, Ap ap2 | - revFlow(node2, _, _, ap2, config) and - readStepFwd(node1, ap1, c, node2, ap2, config) and - revFlowStore(ap1, c, /*unbind*/ ap2, _, _, _, _, _, unbind(config)) - ) - } - - predicate revFlow(Node node, Configuration config) { revFlow(node, _, _, _, config) } - - private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { - storeStepFwd(_, ap, tc, _, _, config) - } - - predicate consCand(TypedContent tc, Ap ap, Configuration config) { - storeStepCand(_, ap, tc, _, _, config) - } - - pragma[noinline] - private predicate parameterFlow( - ParameterNode p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config - ) { - revFlow(p, true, apSome(ap0), ap, config) and - c = p.getEnclosingCallable() - } - - predicate parameterMayFlowThrough(ParameterNode p, DataFlowCallable c, Ap ap, Configuration config) { - exists(ReturnNodeExt ret, Ap ap0, ReturnKindExt kind, int pos | - parameterFlow(p, ap, ap0, c, config) and - c = ret.getEnclosingCallable() and - revFlow(ret, true, apSome(_), ap0, config) and - fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and - kind = ret.getKind() and - p.isParameterOf(_, pos) and - // we don't expect a parameter to return stored in itself - not kind.(ParamUpdateReturnKind).getPosition() = pos - ) - } - - predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { - fwd = true and - nodes = count(Node node | fwdFlow(node, _, _, _, config)) and - fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and - conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and - tuples = count(Node n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) - or - fwd = false and - nodes = count(Node node | revFlow(node, _, _, _, config)) and - fields = count(TypedContent f0 | consCand(f0, _, config)) and - conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and - tuples = count(Node n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) - } - /* End: Stage 4 logic. */ -} - -bindingset[conf, result] -private Configuration unbind(Configuration conf) { result >= conf and result <= conf } - -private predicate nodeMayUseSummary(Node n, AccessPathApprox apa, Configuration config) { - exists(DataFlowCallable c, AccessPathApprox apa0 | - Stage4::parameterMayFlowThrough(_, c, apa, _) and - Stage4::revFlow(n, true, _, apa0, config) and - Stage4::fwdFlow(n, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and - n.getEnclosingCallable() = c - ) -} - -private newtype TSummaryCtx = - TSummaryCtxNone() or - TSummaryCtxSome(ParameterNode p, AccessPath ap) { - Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), _) - } - -/** - * A context for generating flow summaries. This represents flow entry through - * a specific parameter with an access path of a specific shape. - * - * Summaries are only created for parameters that may flow through. - */ -abstract private class SummaryCtx extends TSummaryCtx { - abstract string toString(); -} - -/** A summary context from which no flow summary can be generated. */ -private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone { - override string toString() { result = "" } -} - -/** A summary context from which a flow summary can be generated. */ -private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { - private ParameterNode p; - private AccessPath ap; - - SummaryCtxSome() { this = TSummaryCtxSome(p, ap) } - - int getParameterPos() { p.isParameterOf(_, result) } - - override string toString() { result = p + ": " + ap } - - predicate hasLocationInfo( - string filepath, int startline, int startcolumn, int endline, int endcolumn - ) { - p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) - } -} - -/** - * Gets the number of length 2 access path approximations that correspond to `apa`. - */ -private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) { - exists(TypedContent tc, int len | - tc = apa.getHead() and - len = apa.len() and - result = - strictcount(AccessPathFront apf | - Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1), - config) - ) - ) -} - -private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) { - result = - strictcount(Node n | Stage4::revFlow(n, _, _, apa, config) or nodeMayUseSummary(n, apa, config)) -} - -/** - * Holds if a length 2 access path approximation matching `apa` is expected - * to be expensive. - */ -private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) { - exists(int aps, int nodes, int apLimit, int tupleLimit | - aps = count1to2unfold(apa, config) and - nodes = countNodesUsingAccessPath(apa, config) and - accessPathCostLimits(apLimit, tupleLimit) and - apLimit < aps and - tupleLimit < (aps - 1) * nodes - ) -} - -private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) { - exists(TypedContent head | - apa.pop(head) = result and - Stage4::consCand(head, result, config) - ) -} - -/** - * Holds with `unfold = false` if a precise head-tail representation of `apa` is - * expected to be expensive. Holds with `unfold = true` otherwise. - */ -private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) { - exists(int aps, int nodes, int apLimit, int tupleLimit | - aps = countPotentialAps(apa, config) and - nodes = countNodesUsingAccessPath(apa, config) and - accessPathCostLimits(apLimit, tupleLimit) and - if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true - ) -} - -/** - * Gets the number of `AccessPath`s that correspond to `apa`. - */ -private int countAps(AccessPathApprox apa, Configuration config) { - evalUnfold(apa, false, config) and - result = 1 and - (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config)) - or - evalUnfold(apa, false, config) and - result = count1to2unfold(apa, config) and - not expensiveLen1to2unfolding(apa, config) - or - evalUnfold(apa, true, config) and - result = countPotentialAps(apa, config) -} - -/** - * Gets the number of `AccessPath`s that would correspond to `apa` assuming - * that it is expanded to a precise head-tail representation. - */ -language[monotonicAggregates] -private int countPotentialAps(AccessPathApprox apa, Configuration config) { - apa instanceof AccessPathApproxNil and result = 1 - or - result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config)) -} - -private newtype TAccessPath = - TAccessPathNil(DataFlowType t) or - TAccessPathCons(TypedContent head, AccessPath tail) { - exists(AccessPathApproxCons apa | - not evalUnfold(apa, false, _) and - head = apa.getHead() and - tail.getApprox() = getATail(apa, _) - ) - } or - TAccessPathCons2(TypedContent head1, TypedContent head2, int len) { - exists(AccessPathApproxCons apa | - evalUnfold(apa, false, _) and - not expensiveLen1to2unfolding(apa, _) and - apa.len() = len and - head1 = apa.getHead() and - head2 = getATail(apa, _).getHead() - ) - } or - TAccessPathCons1(TypedContent head, int len) { - exists(AccessPathApproxCons apa | - evalUnfold(apa, false, _) and - expensiveLen1to2unfolding(apa, _) and - apa.len() = len and - head = apa.getHead() - ) - } - -private newtype TPathNode = - TPathNodeMid(Node node, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config) { - // A PathNode is introduced by a source ... - Stage4::revFlow(node, config) and - config.isSource(node) and - cc instanceof CallContextAny and - sc instanceof SummaryCtxNone and - ap = TAccessPathNil(getNodeType(node)) - or - // ... or a step from an existing PathNode to another node. - exists(PathNodeMid mid | - pathStep(mid, node, cc, sc, ap) and - config = mid.getConfiguration() and - Stage4::revFlow(node, _, _, ap.getApprox(), unbind(config)) - ) - } or - TPathNodeSink(Node node, Configuration config) { - config.isSink(node) and - Stage4::revFlow(node, unbind(config)) and - ( - // A sink that is also a source ... - config.isSource(node) - or - // ... or a sink that can be reached from a source - exists(PathNodeMid mid | - pathStep(mid, node, _, _, TAccessPathNil(_)) and - config = unbind(mid.getConfiguration()) - ) - ) - } - -/** - * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a - * source to a given node with a given `AccessPath`, this indicates the sequence - * of dereference operations needed to get from the value in the node to the - * tracked object. The final type indicates the type of the tracked object. - */ -abstract private class AccessPath extends TAccessPath { - /** Gets the head of this access path, if any. */ - abstract TypedContent getHead(); - - /** Gets the tail of this access path, if any. */ - abstract AccessPath getTail(); - - /** Gets the front of this access path. */ - abstract AccessPathFront getFront(); - - /** Gets the approximation of this access path. */ - abstract AccessPathApprox getApprox(); - - /** Gets the length of this access path. */ - abstract int length(); - - /** Gets a textual representation of this access path. */ - abstract string toString(); - - /** Gets the access path obtained by popping `tc` from this access path, if any. */ - final AccessPath pop(TypedContent tc) { - result = this.getTail() and - tc = this.getHead() - } - - /** Gets the access path obtained by pushing `tc` onto this access path. */ - final AccessPath push(TypedContent tc) { this = result.pop(tc) } -} - -private class AccessPathNil extends AccessPath, TAccessPathNil { - private DataFlowType t; - - AccessPathNil() { this = TAccessPathNil(t) } - - DataFlowType getType() { result = t } - - override TypedContent getHead() { none() } - - override AccessPath getTail() { none() } - - override AccessPathFrontNil getFront() { result = TFrontNil(t) } - - override AccessPathApproxNil getApprox() { result = TNil(t) } - - override int length() { result = 0 } - - override string toString() { result = concat(": " + ppReprType(t)) } -} - -private class AccessPathCons extends AccessPath, TAccessPathCons { - private TypedContent head; - private AccessPath tail; - - AccessPathCons() { this = TAccessPathCons(head, tail) } - - override TypedContent getHead() { result = head } - - override AccessPath getTail() { result = tail } - - override AccessPathFrontHead getFront() { result = TFrontHead(head) } - - override AccessPathApproxCons getApprox() { - result = TConsNil(head, tail.(AccessPathNil).getType()) - or - result = TConsCons(head, tail.getHead(), this.length()) - or - result = TCons1(head, this.length()) - } - - override int length() { result = 1 + tail.length() } - - private string toStringImpl(boolean needsSuffix) { - exists(DataFlowType t | - tail = TAccessPathNil(t) and - needsSuffix = false and - result = head.toString() + "]" + concat(" : " + ppReprType(t)) - ) - or - result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix) - or - exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) | - result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true - or - result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false - ) - or - exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) | - result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true - or - result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false - ) - } - - override string toString() { - result = "[" + this.toStringImpl(true) + length().toString() + ")]" - or - result = "[" + this.toStringImpl(false) - } -} - -private class AccessPathCons2 extends AccessPath, TAccessPathCons2 { - private TypedContent head1; - private TypedContent head2; - private int len; - - AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) } - - override TypedContent getHead() { result = head1 } - - override AccessPath getTail() { - Stage4::consCand(head1, result.getApprox(), _) and - result.getHead() = head2 and - result.length() = len - 1 - } - - override AccessPathFrontHead getFront() { result = TFrontHead(head1) } - - override AccessPathApproxCons getApprox() { - result = TConsCons(head1, head2, len) or - result = TCons1(head1, len) - } - - override int length() { result = len } - - override string toString() { - if len = 2 - then result = "[" + head1.toString() + ", " + head2.toString() + "]" - else - result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]" - } -} - -private class AccessPathCons1 extends AccessPath, TAccessPathCons1 { - private TypedContent head; - private int len; - - AccessPathCons1() { this = TAccessPathCons1(head, len) } - - override TypedContent getHead() { result = head } - - override AccessPath getTail() { - Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1 - } - - override AccessPathFrontHead getFront() { result = TFrontHead(head) } - - override AccessPathApproxCons getApprox() { result = TCons1(head, len) } - - override int length() { result = len } - - override string toString() { - if len = 1 - then result = "[" + head.toString() + "]" - else result = "[" + head.toString() + ", ... (" + len.toString() + ")]" - } -} - -/** - * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. - * Only those `PathNode`s that are reachable from a source are generated. - */ -class PathNode extends TPathNode { - /** Gets a textual representation of this element. */ - string toString() { none() } - - /** - * Gets a textual representation of this element, including a textual - * representation of the call context. - */ - string toStringWithContext() { none() } - - /** - * Holds if this element is at the specified location. - * The location spans column `startcolumn` of line `startline` to - * column `endcolumn` of line `endline` in file `filepath`. - * For more information, see - * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). - */ - predicate hasLocationInfo( - string filepath, int startline, int startcolumn, int endline, int endcolumn - ) { - none() - } - - /** Gets the underlying `Node`. */ - Node getNode() { none() } - - /** Gets the associated configuration. */ - Configuration getConfiguration() { none() } - - private predicate isHidden() { - nodeIsHidden(this.getNode()) and - not this.isSource() and - not this instanceof PathNodeSink - } - - private PathNode getASuccessorIfHidden() { - this.isHidden() and - result = this.(PathNodeImpl).getASuccessorImpl() - } - - /** Gets a successor of this node, if any. */ - final PathNode getASuccessor() { - result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and - not this.isHidden() and - not result.isHidden() - } - - /** Holds if this node is a source. */ - predicate isSource() { none() } -} - -abstract private class PathNodeImpl extends PathNode { - abstract PathNode getASuccessorImpl(); - - private string ppAp() { - this instanceof PathNodeSink and result = "" - or - exists(string s | s = this.(PathNodeMid).getAp().toString() | - if s = "" then result = "" else result = " " + s - ) - } - - private string ppCtx() { - this instanceof PathNodeSink and result = "" - or - result = " <" + this.(PathNodeMid).getCallContext().toString() + ">" - } - - override string toString() { result = this.getNode().toString() + ppAp() } - - override string toStringWithContext() { result = this.getNode().toString() + ppAp() + ppCtx() } - - override predicate hasLocationInfo( - string filepath, int startline, int startcolumn, int endline, int endcolumn - ) { - this.getNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) - } -} - -/** Holds if `n` can reach a sink. */ -private predicate reach(PathNode n) { n instanceof PathNodeSink or reach(n.getASuccessor()) } - -/** Holds if `n1.getSucc() = n2` and `n2` can reach a sink. */ -private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and reach(n2) } - -private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) - -/** - * Provides the query predicates needed to include a graph in a path-problem query. - */ -module PathGraph { - /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ - query predicate edges(PathNode a, PathNode b) { pathSucc(a, b) } - - /** Holds if `n` is a node in the graph of data flow path explanations. */ - query predicate nodes(PathNode n, string key, string val) { - reach(n) and key = "semmle.label" and val = n.toString() - } -} - -/** - * An intermediate flow graph node. This is a triple consisting of a `Node`, - * a `CallContext`, and a `Configuration`. - */ -private class PathNodeMid extends PathNodeImpl, TPathNodeMid { - Node node; - CallContext cc; - SummaryCtx sc; - AccessPath ap; - Configuration config; - - PathNodeMid() { this = TPathNodeMid(node, cc, sc, ap, config) } - - override Node getNode() { result = node } - - CallContext getCallContext() { result = cc } - - SummaryCtx getSummaryCtx() { result = sc } - - AccessPath getAp() { result = ap } - - override Configuration getConfiguration() { result = config } - - private PathNodeMid getSuccMid() { - pathStep(this, result.getNode(), result.getCallContext(), result.getSummaryCtx(), result.getAp()) and - result.getConfiguration() = unbind(this.getConfiguration()) - } - - override PathNodeImpl getASuccessorImpl() { - // an intermediate step to another intermediate node - result = getSuccMid() - or - // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges - exists(PathNodeMid mid, PathNodeSink sink | - mid = getSuccMid() and - mid.getNode() = sink.getNode() and - mid.getAp() instanceof AccessPathNil and - sink.getConfiguration() = unbind(mid.getConfiguration()) and - result = sink - ) - } - - override predicate isSource() { - config.isSource(node) and - cc instanceof CallContextAny and - sc instanceof SummaryCtxNone and - ap instanceof AccessPathNil - } -} - -/** - * A flow graph node corresponding to a sink. This is disjoint from the - * intermediate nodes in order to uniquely correspond to a given sink by - * excluding the `CallContext`. - */ -private class PathNodeSink extends PathNodeImpl, TPathNodeSink { - Node node; - Configuration config; - - PathNodeSink() { this = TPathNodeSink(node, config) } - - override Node getNode() { result = node } - - override Configuration getConfiguration() { result = config } - - override PathNode getASuccessorImpl() { none() } - - override predicate isSource() { config.isSource(node) } -} - -/** - * Holds if data may flow from `mid` to `node`. The last step in or out of - * a callable is recorded by `cc`. - */ -private predicate pathStep(PathNodeMid mid, Node node, CallContext cc, SummaryCtx sc, AccessPath ap) { - exists(AccessPath ap0, Node midnode, Configuration conf, LocalCallContext localCC | - midnode = mid.getNode() and - conf = mid.getConfiguration() and - cc = mid.getCallContext() and - sc = mid.getSummaryCtx() and - localCC = getLocalCallContext(cc, midnode.getEnclosingCallable()) and - ap0 = mid.getAp() - | - localFlowBigStep(midnode, node, true, _, conf, localCC) and - ap = ap0 - or - localFlowBigStep(midnode, node, false, ap.getFront(), conf, localCC) and - ap0 instanceof AccessPathNil - ) - or - jumpStep(mid.getNode(), node, mid.getConfiguration()) and - cc instanceof CallContextAny and - sc instanceof SummaryCtxNone and - ap = mid.getAp() - or - additionalJumpStep(mid.getNode(), node, mid.getConfiguration()) and - cc instanceof CallContextAny and - sc instanceof SummaryCtxNone and - mid.getAp() instanceof AccessPathNil and - ap = TAccessPathNil(getNodeType(node)) - or - exists(TypedContent tc | pathStoreStep(mid, node, ap.pop(tc), tc, cc)) and - sc = mid.getSummaryCtx() - or - exists(TypedContent tc | pathReadStep(mid, node, ap.push(tc), tc, cc)) and - sc = mid.getSummaryCtx() - or - pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp() - or - pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone - or - pathThroughCallable(mid, node, cc, ap) and sc = mid.getSummaryCtx() -} - -pragma[nomagic] -private predicate pathReadStep( - PathNodeMid mid, Node node, AccessPath ap0, TypedContent tc, CallContext cc -) { - ap0 = mid.getAp() and - tc = ap0.getHead() and - Stage4::readStepCand(mid.getNode(), tc.getContent(), node, mid.getConfiguration()) and - cc = mid.getCallContext() -} - -pragma[nomagic] -private predicate pathStoreStep( - PathNodeMid mid, Node node, AccessPath ap0, TypedContent tc, CallContext cc -) { - ap0 = mid.getAp() and - Stage4::storeStepCand(mid.getNode(), _, tc, node, _, mid.getConfiguration()) and - cc = mid.getCallContext() -} - -private predicate pathOutOfCallable0( - PathNodeMid mid, ReturnPosition pos, CallContext innercc, AccessPathApprox apa, - Configuration config -) { - pos = getReturnPosition(mid.getNode()) and - innercc = mid.getCallContext() and - innercc instanceof CallContextNoCall and - apa = mid.getAp().getApprox() and - config = mid.getConfiguration() -} - -pragma[nomagic] -private predicate pathOutOfCallable1( - PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, AccessPathApprox apa, - Configuration config -) { - exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | - pathOutOfCallable0(mid, pos, innercc, apa, config) and - c = pos.getCallable() and - kind = pos.getKind() and - resolveReturn(innercc, c, call) - | - if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() - ) -} - -pragma[noinline] -private Node getAnOutNodeFlow( - ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config -) { - result = kind.getAnOutNode(call) and - Stage4::revFlow(result, _, _, apa, config) -} - -/** - * Holds if data may flow from `mid` to `out`. The last step of this path - * is a return from a callable and is recorded by `cc`, if needed. - */ -pragma[noinline] -private predicate pathOutOfCallable(PathNodeMid mid, Node out, CallContext cc) { - exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config | - pathOutOfCallable1(mid, call, kind, cc, apa, config) and - out = getAnOutNodeFlow(kind, call, apa, config) - ) -} - -/** - * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. - */ -pragma[noinline] -private predicate pathIntoArg( - PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap, AccessPathApprox apa -) { - exists(ArgumentNode arg | - arg = mid.getNode() and - cc = mid.getCallContext() and - arg.argumentOf(call, i) and - ap = mid.getAp() and - apa = ap.getApprox() - ) -} - -pragma[noinline] -private predicate parameterCand( - DataFlowCallable callable, int i, AccessPathApprox apa, Configuration config -) { - exists(ParameterNode p | - Stage4::revFlow(p, _, _, apa, config) and - p.isParameterOf(callable, i) - ) -} - -pragma[nomagic] -private predicate pathIntoCallable0( - PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call, - AccessPath ap -) { - exists(AccessPathApprox apa | - pathIntoArg(mid, i, outercc, call, ap, apa) and - callable = resolveCall(call, outercc) and - parameterCand(callable, any(int j | j <= i and j >= i), apa, mid.getConfiguration()) - ) -} - -/** - * Holds if data may flow from `mid` to `p` through `call`. The contexts - * before and after entering the callable are `outercc` and `innercc`, - * respectively. - */ -private predicate pathIntoCallable( - PathNodeMid mid, ParameterNode p, CallContext outercc, CallContextCall innercc, SummaryCtx sc, - DataFlowCall call -) { - exists(int i, DataFlowCallable callable, AccessPath ap | - pathIntoCallable0(mid, callable, i, outercc, call, ap) and - p.isParameterOf(callable, i) and - ( - sc = TSummaryCtxSome(p, ap) - or - not exists(TSummaryCtxSome(p, ap)) and - sc = TSummaryCtxNone() - ) - | - if recordDataFlowCallSite(call, callable) - then innercc = TSpecificCall(call) - else innercc = TSomeCall() - ) -} - -/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */ -pragma[nomagic] -private predicate paramFlowsThrough( - ReturnKindExt kind, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, AccessPathApprox apa, - Configuration config -) { - exists(PathNodeMid mid, ReturnNodeExt ret, int pos | - mid.getNode() = ret and - kind = ret.getKind() and - cc = mid.getCallContext() and - sc = mid.getSummaryCtx() and - config = mid.getConfiguration() and - ap = mid.getAp() and - apa = ap.getApprox() and - pos = sc.getParameterPos() and - not kind.(ParamUpdateReturnKind).getPosition() = pos - ) -} - -pragma[nomagic] -private predicate pathThroughCallable0( - DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap, - AccessPathApprox apa -) { - exists(CallContext innercc, SummaryCtx sc | - pathIntoCallable(mid, _, cc, innercc, sc, call) and - paramFlowsThrough(kind, innercc, sc, ap, apa, unbind(mid.getConfiguration())) - ) -} - -/** - * Holds if data may flow from `mid` through a callable to the node `out`. - * The context `cc` is restored to its value prior to entering the callable. - */ -pragma[noinline] -private predicate pathThroughCallable(PathNodeMid mid, Node out, CallContext cc, AccessPath ap) { - exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa | - pathThroughCallable0(call, mid, kind, cc, ap, apa) and - out = getAnOutNodeFlow(kind, call, apa, unbind(mid.getConfiguration())) - ) -} - -/** - * Holds if data can flow (inter-procedurally) from `source` to `sink`. - * - * Will only have results if `configuration` has non-empty sources and - * sinks. - */ -private predicate flowsTo( - PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration -) { - flowsource.isSource() and - flowsource.getConfiguration() = configuration and - flowsource.getNode() = source and - (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and - flowsink.getNode() = sink -} - -/** - * Holds if data can flow (inter-procedurally) from `source` to `sink`. - * - * Will only have results if `configuration` has non-empty sources and - * sinks. - */ -predicate flowsTo(Node source, Node sink, Configuration configuration) { - flowsTo(_, _, source, sink, configuration) -} - -private predicate finalStats(boolean fwd, int nodes, int fields, int conscand, int tuples) { - fwd = true and - nodes = count(Node n0 | exists(PathNode pn | pn.getNode() = n0)) and - fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and - conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and - tuples = count(PathNode pn) - or - fwd = false and - nodes = count(Node n0 | exists(PathNode pn | pn.getNode() = n0 and reach(pn))) and - fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and - conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and - tuples = count(PathNode pn | reach(pn)) -} - -/** - * INTERNAL: Only for debugging. - * - * Calculates per-stage metrics for data flow. - */ -predicate stageStats( - int n, string stage, int nodes, int fields, int conscand, int tuples, Configuration config -) { - stage = "1 Fwd" and n = 10 and Stage1::stats(true, nodes, fields, conscand, tuples, config) - or - stage = "1 Rev" and n = 15 and Stage1::stats(false, nodes, fields, conscand, tuples, config) - or - stage = "2 Fwd" and n = 20 and Stage2::stats(true, nodes, fields, conscand, tuples, config) - or - stage = "2 Rev" and n = 25 and Stage2::stats(false, nodes, fields, conscand, tuples, config) - or - stage = "3 Fwd" and n = 30 and Stage3::stats(true, nodes, fields, conscand, tuples, config) - or - stage = "3 Rev" and n = 35 and Stage3::stats(false, nodes, fields, conscand, tuples, config) - or - stage = "4 Fwd" and n = 40 and Stage4::stats(true, nodes, fields, conscand, tuples, config) - or - stage = "4 Rev" and n = 45 and Stage4::stats(false, nodes, fields, conscand, tuples, config) - or - stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, tuples) - or - stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, tuples) -} - -private module FlowExploration { - private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) { - exists(Node node1, Node node2 | - jumpStep(node1, node2, config) - or - additionalJumpStep(node1, node2, config) - or - // flow into callable - viableParamArg(_, node2, node1) - or - // flow out of a callable - viableReturnPosOut(_, getReturnPosition(node1), node2) - | - c1 = node1.getEnclosingCallable() and - c2 = node2.getEnclosingCallable() and - c1 != c2 - ) - } - - private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) { - exists(Node n | config.isSource(n) and c = n.getEnclosingCallable()) - or - exists(DataFlowCallable mid | - interestingCallableSrc(mid, config) and callableStep(mid, c, config) - ) - } - - private predicate interestingCallableSink(DataFlowCallable c, Configuration config) { - exists(Node n | config.isSink(n) and c = n.getEnclosingCallable()) - or - exists(DataFlowCallable mid | - interestingCallableSink(mid, config) and callableStep(c, mid, config) - ) - } - - private newtype TCallableExt = - TCallable(DataFlowCallable c, Configuration config) { - interestingCallableSrc(c, config) or - interestingCallableSink(c, config) - } or - TCallableSrc() or - TCallableSink() - - private predicate callableExtSrc(TCallableSrc src) { any() } - - private predicate callableExtSink(TCallableSink sink) { any() } - - private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) { - exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config | - callableStep(c1, c2, config) and - ce1 = TCallable(c1, config) and - ce2 = TCallable(c2, unbind(config)) - ) - or - exists(Node n, Configuration config | - ce1 = TCallableSrc() and - config.isSource(n) and - ce2 = TCallable(n.getEnclosingCallable(), config) - ) - or - exists(Node n, Configuration config | - ce2 = TCallableSink() and - config.isSink(n) and - ce1 = TCallable(n.getEnclosingCallable(), config) - ) - } - - private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) { - callableExtStepFwd(ce2, ce1) - } - - private int distSrcExt(TCallableExt c) = - shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result) - - private int distSinkExt(TCallableExt c) = - shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result) - - private int distSrc(DataFlowCallable c, Configuration config) { - result = distSrcExt(TCallable(c, config)) - 1 - } - - private int distSink(DataFlowCallable c, Configuration config) { - result = distSinkExt(TCallable(c, config)) - 1 - } - - private newtype TPartialAccessPath = - TPartialNil(DataFlowType t) or - TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] } - - /** - * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first - * element of the list and its length are tracked. If data flows from a source to - * a given node with a given `AccessPath`, this indicates the sequence of - * dereference operations needed to get from the value in the node to the - * tracked object. The final type indicates the type of the tracked object. - */ - private class PartialAccessPath extends TPartialAccessPath { - abstract string toString(); - - TypedContent getHead() { this = TPartialCons(result, _) } - - int len() { - this = TPartialNil(_) and result = 0 - or - this = TPartialCons(_, result) - } - - DataFlowType getType() { - this = TPartialNil(result) - or - exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType()) - } - } - - private class PartialAccessPathNil extends PartialAccessPath, TPartialNil { - override string toString() { - exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t))) - } - } - - private class PartialAccessPathCons extends PartialAccessPath, TPartialCons { - override string toString() { - exists(TypedContent tc, int len | this = TPartialCons(tc, len) | - if len = 1 - then result = "[" + tc.toString() + "]" - else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" - ) - } - } - - private newtype TRevPartialAccessPath = - TRevPartialNil() or - TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] } - - /** - * Conceptually a list of `Content`s, but only the first - * element of the list and its length are tracked. - */ - private class RevPartialAccessPath extends TRevPartialAccessPath { - abstract string toString(); - - Content getHead() { this = TRevPartialCons(result, _) } - - int len() { - this = TRevPartialNil() and result = 0 - or - this = TRevPartialCons(_, result) - } - } - - private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil { - override string toString() { result = "" } - } - - private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons { - override string toString() { - exists(Content c, int len | this = TRevPartialCons(c, len) | - if len = 1 - then result = "[" + c.toString() + "]" - else result = "[" + c.toString() + ", ... (" + len.toString() + ")]" - ) - } - } - - private newtype TSummaryCtx1 = - TSummaryCtx1None() or - TSummaryCtx1Param(ParameterNode p) - - private newtype TSummaryCtx2 = - TSummaryCtx2None() or - TSummaryCtx2Some(PartialAccessPath ap) - - private newtype TRevSummaryCtx1 = - TRevSummaryCtx1None() or - TRevSummaryCtx1Some(ReturnPosition pos) - - private newtype TRevSummaryCtx2 = - TRevSummaryCtx2None() or - TRevSummaryCtx2Some(RevPartialAccessPath ap) - - private newtype TPartialPathNode = - TPartialPathNodeFwd( - Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, - Configuration config - ) { - config.isSource(node) and - cc instanceof CallContextAny and - sc1 = TSummaryCtx1None() and - sc2 = TSummaryCtx2None() and - ap = TPartialNil(getNodeType(node)) and - not fullBarrier(node, config) and - exists(config.explorationLimit()) - or - partialPathNodeMk0(node, cc, sc1, sc2, ap, config) and - distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit() - } or - TPartialPathNodeRev( - Node node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, RevPartialAccessPath ap, - Configuration config - ) { - config.isSink(node) and - sc1 = TRevSummaryCtx1None() and - sc2 = TRevSummaryCtx2None() and - ap = TRevPartialNil() and - not fullBarrier(node, config) and - exists(config.explorationLimit()) - or - exists(PartialPathNodeRev mid | - revPartialPathStep(mid, node, sc1, sc2, ap, config) and - not clearsContent(node, ap.getHead()) and - not fullBarrier(node, config) and - distSink(node.getEnclosingCallable(), config) <= config.explorationLimit() - ) - } - - pragma[nomagic] - private predicate partialPathNodeMk0( - Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, - Configuration config - ) { - exists(PartialPathNodeFwd mid | - partialPathStep(mid, node, cc, sc1, sc2, ap, config) and - not fullBarrier(node, config) and - not clearsContent(node, ap.getHead().getContent()) and - if node instanceof CastingNode - then compatibleTypes(getNodeType(node), ap.getType()) - else any() - ) - } - - /** - * A `Node` augmented with a call context, an access path, and a configuration. - */ - class PartialPathNode extends TPartialPathNode { - /** Gets a textual representation of this element. */ - string toString() { result = this.getNode().toString() + this.ppAp() } - - /** - * Gets a textual representation of this element, including a textual - * representation of the call context. - */ - string toStringWithContext() { result = this.getNode().toString() + this.ppAp() + this.ppCtx() } - - /** - * Holds if this element is at the specified location. - * The location spans column `startcolumn` of line `startline` to - * column `endcolumn` of line `endline` in file `filepath`. - * For more information, see - * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). - */ - predicate hasLocationInfo( - string filepath, int startline, int startcolumn, int endline, int endcolumn - ) { - this.getNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) - } - - /** Gets the underlying `Node`. */ - Node getNode() { none() } - - /** Gets the associated configuration. */ - Configuration getConfiguration() { none() } - - /** Gets a successor of this node, if any. */ - PartialPathNode getASuccessor() { none() } - - /** - * Gets the approximate distance to the nearest source measured in number - * of interprocedural steps. - */ - int getSourceDistance() { - result = distSrc(this.getNode().getEnclosingCallable(), this.getConfiguration()) - } - - /** - * Gets the approximate distance to the nearest sink measured in number - * of interprocedural steps. - */ - int getSinkDistance() { - result = distSink(this.getNode().getEnclosingCallable(), this.getConfiguration()) - } - - private string ppAp() { - exists(string s | - s = this.(PartialPathNodeFwd).getAp().toString() or - s = this.(PartialPathNodeRev).getAp().toString() - | - if s = "" then result = "" else result = " " + s - ) - } - - private string ppCtx() { - result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">" - } - - /** Holds if this is a source in a forward-flow path. */ - predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() } - - /** Holds if this is a sink in a reverse-flow path. */ - predicate isRevSink() { this.(PartialPathNodeRev).isSink() } - } - - /** - * Provides the query predicates needed to include a graph in a path-problem query. - */ - module PartialPathGraph { - /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ - query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b } - } - - private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd { - Node node; - CallContext cc; - TSummaryCtx1 sc1; - TSummaryCtx2 sc2; - PartialAccessPath ap; - Configuration config; - - PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, cc, sc1, sc2, ap, config) } - - override Node getNode() { result = node } - - CallContext getCallContext() { result = cc } - - TSummaryCtx1 getSummaryCtx1() { result = sc1 } - - TSummaryCtx2 getSummaryCtx2() { result = sc2 } - - PartialAccessPath getAp() { result = ap } - - override Configuration getConfiguration() { result = config } - - override PartialPathNodeFwd getASuccessor() { - partialPathStep(this, result.getNode(), result.getCallContext(), result.getSummaryCtx1(), - result.getSummaryCtx2(), result.getAp(), result.getConfiguration()) - } - - predicate isSource() { - config.isSource(node) and - cc instanceof CallContextAny and - sc1 = TSummaryCtx1None() and - sc2 = TSummaryCtx2None() and - ap instanceof TPartialNil - } - } - - private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev { - Node node; - TRevSummaryCtx1 sc1; - TRevSummaryCtx2 sc2; - RevPartialAccessPath ap; - Configuration config; - - PartialPathNodeRev() { this = TPartialPathNodeRev(node, sc1, sc2, ap, config) } - - override Node getNode() { result = node } - - TRevSummaryCtx1 getSummaryCtx1() { result = sc1 } - - TRevSummaryCtx2 getSummaryCtx2() { result = sc2 } - - RevPartialAccessPath getAp() { result = ap } - - override Configuration getConfiguration() { result = config } - - override PartialPathNodeRev getASuccessor() { - revPartialPathStep(result, this.getNode(), this.getSummaryCtx1(), this.getSummaryCtx2(), - this.getAp(), this.getConfiguration()) - } - - predicate isSink() { - config.isSink(node) and - sc1 = TRevSummaryCtx1None() and - sc2 = TRevSummaryCtx2None() and - ap = TRevPartialNil() - } - } - - private predicate partialPathStep( - PartialPathNodeFwd mid, Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, - PartialAccessPath ap, Configuration config - ) { - not isUnreachableInCall(node, cc.(CallContextSpecificCall).getCall()) and - ( - localFlowStep(mid.getNode(), node, config) and - cc = mid.getCallContext() and - sc1 = mid.getSummaryCtx1() and - sc2 = mid.getSummaryCtx2() and - ap = mid.getAp() and - config = mid.getConfiguration() - or - additionalLocalFlowStep(mid.getNode(), node, config) and - cc = mid.getCallContext() and - sc1 = mid.getSummaryCtx1() and - sc2 = mid.getSummaryCtx2() and - mid.getAp() instanceof PartialAccessPathNil and - ap = TPartialNil(getNodeType(node)) and - config = mid.getConfiguration() - ) - or - jumpStep(mid.getNode(), node, config) and - cc instanceof CallContextAny and - sc1 = TSummaryCtx1None() and - sc2 = TSummaryCtx2None() and - ap = mid.getAp() and - config = mid.getConfiguration() - or - additionalJumpStep(mid.getNode(), node, config) and - cc instanceof CallContextAny and - sc1 = TSummaryCtx1None() and - sc2 = TSummaryCtx2None() and - mid.getAp() instanceof PartialAccessPathNil and - ap = TPartialNil(getNodeType(node)) and - config = mid.getConfiguration() - or - partialPathStoreStep(mid, _, _, node, ap) and - cc = mid.getCallContext() and - sc1 = mid.getSummaryCtx1() and - sc2 = mid.getSummaryCtx2() and - config = mid.getConfiguration() - or - exists(PartialAccessPath ap0, TypedContent tc | - partialPathReadStep(mid, ap0, tc, node, cc, config) and - sc1 = mid.getSummaryCtx1() and - sc2 = mid.getSummaryCtx2() and - apConsFwd(ap, tc, ap0, config) and - compatibleTypes(ap.getType(), getNodeType(node)) - ) - or - partialPathIntoCallable(mid, node, _, cc, sc1, sc2, _, ap, config) - or - partialPathOutOfCallable(mid, node, cc, ap, config) and - sc1 = TSummaryCtx1None() and - sc2 = TSummaryCtx2None() - or - partialPathThroughCallable(mid, node, cc, ap, config) and - sc1 = mid.getSummaryCtx1() and - sc2 = mid.getSummaryCtx2() - } - - bindingset[result, i] - private int unbindInt(int i) { i <= result and i >= result } - - pragma[inline] - private predicate partialPathStoreStep( - PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, Node node, PartialAccessPath ap2 - ) { - exists(Node midNode, DataFlowType contentType | - midNode = mid.getNode() and - ap1 = mid.getAp() and - store(midNode, tc, node, contentType) and - ap2.getHead() = tc and - ap2.len() = unbindInt(ap1.len() + 1) and - compatibleTypes(ap1.getType(), contentType) - ) - } - - pragma[nomagic] - private predicate apConsFwd( - PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config - ) { - exists(PartialPathNodeFwd mid | - partialPathStoreStep(mid, ap1, tc, _, ap2) and - config = mid.getConfiguration() - ) - } - - pragma[nomagic] - private predicate partialPathReadStep( - PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, Node node, CallContext cc, - Configuration config - ) { - exists(Node midNode | - midNode = mid.getNode() and - ap = mid.getAp() and - read(midNode, tc.getContent(), node) and - ap.getHead() = tc and - config = mid.getConfiguration() and - cc = mid.getCallContext() - ) - } - - private predicate partialPathOutOfCallable0( - PartialPathNodeFwd mid, ReturnPosition pos, CallContext innercc, PartialAccessPath ap, - Configuration config - ) { - pos = getReturnPosition(mid.getNode()) and - innercc = mid.getCallContext() and - innercc instanceof CallContextNoCall and - ap = mid.getAp() and - config = mid.getConfiguration() - } - - pragma[nomagic] - private predicate partialPathOutOfCallable1( - PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, - PartialAccessPath ap, Configuration config - ) { - exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | - partialPathOutOfCallable0(mid, pos, innercc, ap, config) and - c = pos.getCallable() and - kind = pos.getKind() and - resolveReturn(innercc, c, call) - | - if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() - ) - } - - private predicate partialPathOutOfCallable( - PartialPathNodeFwd mid, Node out, CallContext cc, PartialAccessPath ap, Configuration config - ) { - exists(ReturnKindExt kind, DataFlowCall call | - partialPathOutOfCallable1(mid, call, kind, cc, ap, config) - | - out = kind.getAnOutNode(call) - ) - } - - pragma[noinline] - private predicate partialPathIntoArg( - PartialPathNodeFwd mid, int i, CallContext cc, DataFlowCall call, PartialAccessPath ap, - Configuration config - ) { - exists(ArgumentNode arg | - arg = mid.getNode() and - cc = mid.getCallContext() and - arg.argumentOf(call, i) and - ap = mid.getAp() and - config = mid.getConfiguration() - ) - } - - pragma[nomagic] - private predicate partialPathIntoCallable0( - PartialPathNodeFwd mid, DataFlowCallable callable, int i, CallContext outercc, - DataFlowCall call, PartialAccessPath ap, Configuration config - ) { - partialPathIntoArg(mid, i, outercc, call, ap, config) and - callable = resolveCall(call, outercc) - } - - private predicate partialPathIntoCallable( - PartialPathNodeFwd mid, ParameterNode p, CallContext outercc, CallContextCall innercc, - TSummaryCtx1 sc1, TSummaryCtx2 sc2, DataFlowCall call, PartialAccessPath ap, - Configuration config - ) { - exists(int i, DataFlowCallable callable | - partialPathIntoCallable0(mid, callable, i, outercc, call, ap, config) and - p.isParameterOf(callable, i) and - sc1 = TSummaryCtx1Param(p) and - sc2 = TSummaryCtx2Some(ap) - | - if recordDataFlowCallSite(call, callable) - then innercc = TSpecificCall(call) - else innercc = TSomeCall() - ) - } - - pragma[nomagic] - private predicate paramFlowsThroughInPartialPath( - ReturnKindExt kind, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, - PartialAccessPath ap, Configuration config - ) { - exists(PartialPathNodeFwd mid, ReturnNodeExt ret | - mid.getNode() = ret and - kind = ret.getKind() and - cc = mid.getCallContext() and - sc1 = mid.getSummaryCtx1() and - sc2 = mid.getSummaryCtx2() and - config = mid.getConfiguration() and - ap = mid.getAp() - ) - } - - pragma[noinline] - private predicate partialPathThroughCallable0( - DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, CallContext cc, - PartialAccessPath ap, Configuration config - ) { - exists(ParameterNode p, CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2 | - partialPathIntoCallable(mid, p, cc, innercc, sc1, sc2, call, _, config) and - paramFlowsThroughInPartialPath(kind, innercc, sc1, sc2, ap, config) - ) - } - - private predicate partialPathThroughCallable( - PartialPathNodeFwd mid, Node out, CallContext cc, PartialAccessPath ap, Configuration config - ) { - exists(DataFlowCall call, ReturnKindExt kind | - partialPathThroughCallable0(call, mid, kind, cc, ap, config) and - out = kind.getAnOutNode(call) - ) - } - - private predicate revPartialPathStep( - PartialPathNodeRev mid, Node node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, - RevPartialAccessPath ap, Configuration config - ) { - localFlowStep(node, mid.getNode(), config) and - sc1 = mid.getSummaryCtx1() and - sc2 = mid.getSummaryCtx2() and - ap = mid.getAp() and - config = mid.getConfiguration() - or - additionalLocalFlowStep(node, mid.getNode(), config) and - sc1 = mid.getSummaryCtx1() and - sc2 = mid.getSummaryCtx2() and - mid.getAp() instanceof RevPartialAccessPathNil and - ap = TRevPartialNil() and - config = mid.getConfiguration() - or - jumpStep(node, mid.getNode(), config) and - sc1 = TRevSummaryCtx1None() and - sc2 = TRevSummaryCtx2None() and - ap = mid.getAp() and - config = mid.getConfiguration() - or - additionalJumpStep(node, mid.getNode(), config) and - sc1 = TRevSummaryCtx1None() and - sc2 = TRevSummaryCtx2None() and - mid.getAp() instanceof RevPartialAccessPathNil and - ap = TRevPartialNil() and - config = mid.getConfiguration() - or - revPartialPathReadStep(mid, _, _, node, ap) and - sc1 = mid.getSummaryCtx1() and - sc2 = mid.getSummaryCtx2() and - config = mid.getConfiguration() - or - exists(RevPartialAccessPath ap0, Content c | - revPartialPathStoreStep(mid, ap0, c, node, config) and - sc1 = mid.getSummaryCtx1() and - sc2 = mid.getSummaryCtx2() and - apConsRev(ap, c, ap0, config) - ) - or - exists(ParameterNode p | - mid.getNode() = p and - viableParamArg(_, p, node) and - sc1 = mid.getSummaryCtx1() and - sc2 = mid.getSummaryCtx2() and - sc1 = TRevSummaryCtx1None() and - sc2 = TRevSummaryCtx2None() and - ap = mid.getAp() and - config = mid.getConfiguration() - ) - or - exists(ReturnPosition pos | - revPartialPathIntoReturn(mid, pos, sc1, sc2, _, ap, config) and - pos = getReturnPosition(node) - ) - or - revPartialPathThroughCallable(mid, node, ap, config) and - sc1 = mid.getSummaryCtx1() and - sc2 = mid.getSummaryCtx2() - } - - pragma[inline] - private predicate revPartialPathReadStep( - PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, Node node, RevPartialAccessPath ap2 - ) { - exists(Node midNode | - midNode = mid.getNode() and - ap1 = mid.getAp() and - read(node, c, midNode) and - ap2.getHead() = c and - ap2.len() = unbindInt(ap1.len() + 1) - ) - } - - pragma[nomagic] - private predicate apConsRev( - RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config - ) { - exists(PartialPathNodeRev mid | - revPartialPathReadStep(mid, ap1, c, _, ap2) and - config = mid.getConfiguration() - ) - } - - pragma[nomagic] - private predicate revPartialPathStoreStep( - PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, Node node, Configuration config - ) { - exists(Node midNode, TypedContent tc | - midNode = mid.getNode() and - ap = mid.getAp() and - store(node, tc, midNode, _) and - ap.getHead() = c and - config = mid.getConfiguration() and - tc.getContent() = c - ) - } - - pragma[nomagic] - private predicate revPartialPathIntoReturn( - PartialPathNodeRev mid, ReturnPosition pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, - DataFlowCall call, RevPartialAccessPath ap, Configuration config - ) { - exists(Node out | - mid.getNode() = out and - viableReturnPosOut(call, pos, out) and - sc1 = TRevSummaryCtx1Some(pos) and - sc2 = TRevSummaryCtx2Some(ap) and - ap = mid.getAp() and - config = mid.getConfiguration() - ) - } - - pragma[nomagic] - private predicate revPartialPathFlowsThrough( - int pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, RevPartialAccessPath ap, - Configuration config - ) { - exists(PartialPathNodeRev mid, ParameterNode p | - mid.getNode() = p and - p.isParameterOf(_, pos) and - sc1 = mid.getSummaryCtx1() and - sc2 = mid.getSummaryCtx2() and - ap = mid.getAp() and - config = mid.getConfiguration() - ) - } - - pragma[nomagic] - private predicate revPartialPathThroughCallable0( - DataFlowCall call, PartialPathNodeRev mid, int pos, RevPartialAccessPath ap, - Configuration config - ) { - exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2 | - revPartialPathIntoReturn(mid, _, sc1, sc2, call, _, config) and - revPartialPathFlowsThrough(pos, sc1, sc2, ap, config) - ) - } - - pragma[nomagic] - private predicate revPartialPathThroughCallable( - PartialPathNodeRev mid, ArgumentNode node, RevPartialAccessPath ap, Configuration config - ) { - exists(DataFlowCall call, int pos | - revPartialPathThroughCallable0(call, mid, pos, ap, config) and - node.argumentOf(call, pos) - ) - } -} - -import FlowExploration - -private predicate partialFlow( - PartialPathNode source, PartialPathNode node, Configuration configuration -) { - source.getConfiguration() = configuration and - source.isFwdSource() and - node = source.getASuccessor+() -} - -private predicate revPartialFlow( - PartialPathNode node, PartialPathNode sink, Configuration configuration -) { - sink.getConfiguration() = configuration and - sink.isRevSink() and - node.getASuccessor+() = sink -} From 4610b1b39208be5de2c7364e3481046c14548d11 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 24 Feb 2021 20:27:41 +0100 Subject: [PATCH 23/25] Pyhton: Use type back-tracking for keysize on key-generation Internal evaluation showed that this didn't perform better than normal (forward) type-tracking, but it feels more like the right approach. --- python/ql/src/semmle/python/Concepts.qll | 31 ++++++++++++------------ 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/python/ql/src/semmle/python/Concepts.qll b/python/ql/src/semmle/python/Concepts.qll index ffc3c0382e5..3729bee0466 100644 --- a/python/ql/src/semmle/python/Concepts.qll +++ b/python/ql/src/semmle/python/Concepts.qll @@ -562,34 +562,32 @@ module Cryptography { /** Provides classes for modeling new key-pair generation APIs. */ module KeyGeneration { - /** Gets a reference to an integer literal, as well as the origin of the integer literal. */ - private DataFlow::Node keysizeTracker( - DataFlow::TypeTracker t, int keySize, DataFlow::Node origin - ) { + /** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */ + DataFlow::LocalSourceNode keysizeBacktracker(DataFlow::TypeBackTracker t, DataFlow::Node arg) { t.start() and - result.asExpr().(IntegerLiteral).getValue() = keySize and - origin = result + arg = any(KeyGeneration::Range r).getKeySizeArg() and + result = arg.getALocalSource() or // Due to bad performance when using normal setup with we have inlined that code and forced a join - exists(DataFlow::TypeTracker t2 | + exists(DataFlow::TypeBackTracker t2 | exists(DataFlow::StepSummary summary | - keysizeTracker_first_join(t2, keySize, origin, result, summary) and - t = t2.append(summary) + keysizeBacktracker_first_join(t2, arg, result, summary) and + t = t2.prepend(summary) ) ) } pragma[nomagic] - private predicate keysizeTracker_first_join( - DataFlow::TypeTracker t2, int keySize, DataFlow::Node origin, DataFlow::Node res, + private predicate keysizeBacktracker_first_join( + DataFlow::TypeBackTracker t2, DataFlow::Node arg, DataFlow::Node res, DataFlow::StepSummary summary ) { - DataFlow::StepSummary::step(keysizeTracker(t2, keySize, origin), res, summary) + DataFlow::StepSummary::step(res, keysizeBacktracker(t2, arg), summary) } - /** Gets a reference to an integer literal, as well as the origin of the integer literal. */ - private DataFlow::Node keysizeTracker(int keySize, DataFlow::Node origin) { - result = keysizeTracker(DataFlow::TypeTracker::end(), keySize, origin) + /** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */ + DataFlow::LocalSourceNode keysizeBacktracker(DataFlow::Node arg) { + result = keysizeBacktracker(DataFlow::TypeBackTracker::end(), arg) } /** @@ -610,7 +608,8 @@ module Cryptography { * explains how we obtained this specific key size. */ int getKeySizeWithOrigin(DataFlow::Node origin) { - this.getKeySizeArg() = keysizeTracker(result, origin) + origin = keysizeBacktracker(this.getKeySizeArg()) and + result = origin.asExpr().(IntegerLiteral).getValue() } /** Gets the minimum key size (in bits) for this algorithm to be considered secure. */ From 472ff975611e8e9b547ca75646856417f63a031d Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 25 Feb 2021 11:26:24 +0100 Subject: [PATCH 24/25] Docs: Add crypto to supported Python frameworks --- docs/codeql/support/reusables/frameworks.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/codeql/support/reusables/frameworks.rst b/docs/codeql/support/reusables/frameworks.rst index 4dd155e2302..3963d82fcec 100644 --- a/docs/codeql/support/reusables/frameworks.rst +++ b/docs/codeql/support/reusables/frameworks.rst @@ -147,3 +147,6 @@ Python built-in support MySQLdb, Database psycopg2, Database sqlite3, Database + cryptography, Cryptography library + pycryptodome, Cryptography library + pycryptodomex, Cryptography library From 7b92012edf66aa1228fc6d790e3a8aa55777b950 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 18 Mar 2021 10:58:49 +0100 Subject: [PATCH 25/25] Python: Apply suggestions from code review Co-authored-by: yoff --- python/ql/src/semmle/python/Concepts.qll | 4 ++-- python/ql/src/semmle/python/frameworks/Cryptodome.qll | 4 ++-- python/ql/src/semmle/python/frameworks/Cryptography.qll | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/ql/src/semmle/python/Concepts.qll b/python/ql/src/semmle/python/Concepts.qll index 3729bee0466..0e5814d203b 100644 --- a/python/ql/src/semmle/python/Concepts.qll +++ b/python/ql/src/semmle/python/Concepts.qll @@ -545,7 +545,7 @@ module Cryptography { /** Gets the name of the cryptographic algorithm (for example `"RSA"` or `"AES"`). */ string getName() { result = range.getName() } - /** Gets the argument that specifies size of the key in bits, if available. */ + /** Gets the argument that specifies the size of the key in bits, if available. */ DataFlow::Node getKeySizeArg() { result = range.getKeySizeArg() } /** @@ -600,7 +600,7 @@ module Cryptography { /** Gets the name of the cryptographic algorithm (for example `"RSA"`). */ abstract string getName(); - /** Gets the argument that specifies size of the key in bits, if available. */ + /** Gets the argument that specifies the size of the key in bits, if available. */ abstract DataFlow::Node getKeySizeArg(); /** diff --git a/python/ql/src/semmle/python/frameworks/Cryptodome.qll b/python/ql/src/semmle/python/frameworks/Cryptodome.qll index bd28da1067d..19db03f2e8e 100644 --- a/python/ql/src/semmle/python/frameworks/Cryptodome.qll +++ b/python/ql/src/semmle/python/frameworks/Cryptodome.qll @@ -77,12 +77,12 @@ private module CryptodomeModel { } /** Gets the argument that specifies the curve to use (a string). */ - DataFlow::Node getCurveArg() { result in [this.getArgByName("curve")] } + DataFlow::Node getCurveArg() { result = this.getArgByName("curve") } /** Gets the name of the curve to use, as well as the origin that explains how we obtained this name. */ string getCurveWithOrigin(DataFlow::Node origin) { exists(StrConst str | origin = DataFlow::exprNode(str) | - origin.(DataFlow::LocalSourceNode).flowsTo(this.getCurveArg()) and + origin = this.getCurveArg().getALocalSource() and result = str.getText() ) } diff --git a/python/ql/src/semmle/python/frameworks/Cryptography.qll b/python/ql/src/semmle/python/frameworks/Cryptography.qll index 1c1e309fcce..ec929e78836 100644 --- a/python/ql/src/semmle/python/frameworks/Cryptography.qll +++ b/python/ql/src/semmle/python/frameworks/Cryptography.qll @@ -35,7 +35,7 @@ private module CryptographyModel { | // obtained by manually looking at source code in // https://github.com/pyca/cryptography/blob/cba69f1922803f4f29a3fde01741890d88b8e217/src/cryptography/hazmat/primitives/asymmetric/ec.py#L208-L300 - curveName = "SECT571R1" and keySize = 570 + curveName = "SECT571R1" and keySize = 570 // Indeed the numbers do not match. or curveName = "SECT409R1" and keySize = 409 or