Resolve merge conflict

2025-12-18 18:10:39 +01:00 · 2021-06-18 02:12:49 +02:00
parent dcb1da338b 64001cc02c
commit eac5254a88
2241 changed files with 88064 additions and 21956 deletions
--- a/python/ql/src/Classes/ShouldBeContextManager.qhelp
+++ b/python/ql/src/Classes/ShouldBeContextManager.qhelp
@@ -31,7 +31,7 @@ updated to use a context manager.</p>
 </example>
 <references>

-<li>Effbot: <a href="http://effbot.org/zone/python-with-statement.htm">Python with statement</a>.</li>
+<li>Effbot: <a href="https://web.archive.org/web/20201012110738/http://effbot.org/zone/python-with-statement.htm">Python with statement</a>.</li>
 <li>Python Standard Library: <a href="http://docs.python.org/library/stdtypes.html#context-manager-types">Context manager
 </a>.</li>
 <li>Python Language Reference: <a href="http://docs.python.org/2.7/reference/datamodel.html#with-statement-context-managers">
--- a/python/ql/src/Diagnostics/ExtractionErrors.ql
+++ b/python/ql/src/Diagnostics/ExtractionErrors.ql
@@ -0,0 +1,23 @@
+/**
+ * @name Python extraction errors
+ * @description List all extraction errors for Python files in the source code directory.
+ * @kind diagnostic
+ * @id py/diagnostics/extraction-errors
+ */
+
+import python
+
+/**
+ * Gets the SARIF severity for errors.
+ *
+ * See point 3.27.10 in https://docs.oasis-open.org/sarif/sarif/v2.0/sarif-v2.0.html for
+ * what error means.
+ */
+int getErrorSeverity() { result = 2 }
+
+from SyntaxError error, File file
+where
+  file = error.getFile() and
+  exists(file.getRelativePath())
+select error, "Extraction failed in " + file + " with error " + error.getMessage(),
+  getErrorSeverity()
--- a/python/ql/src/Diagnostics/SuccessfullyExtractedFiles.ql
+++ b/python/ql/src/Diagnostics/SuccessfullyExtractedFiles.ql
@@ -0,0 +1,15 @@
+/**
+ * @name Successfully extracted Python files
+ * @description Lists all Python files in the source code directory that were extracted
+ *   without encountering an error.
+ * @kind diagnostic
+ * @id py/diagnostics/successfully-extracted-files
+ */
+
+import python
+
+from File file
+where
+  not exists(SyntaxError e | e.getFile() = file) and
+  exists(file.getRelativePath())
+select file, ""
--- a/python/ql/src/Expressions/UseofInput.ql
+++ b/python/ql/src/Expressions/UseofInput.ql
@@ -4,18 +4,21 @@
 * @kind problem
 * @tags security
 *       correctness
+ *       security/cwe/cwe-78
 * @problem.severity error
+ * @security-severity 5.9
 * @sub-severity high
 * @precision high
 * @id py/use-of-input
 */

 import python
+import semmle.python.dataflow.new.DataFlow
+import semmle.python.ApiGraphs

-from CallNode call, Context context, ControlFlowNode func
+from DataFlow::CallCfgNode call
 where
-  context.getAVersion().includes(2, _) and
-  call.getFunction() = func and
-  func.pointsTo(context, Value::named("input"), _) and
-  not func.pointsTo(context, Value::named("raw_input"), _)
+  major_version() = 2 and
+  call = API::builtin("input").getACall() and
+  call != API::builtin("raw_input").getACall()
 select call, "The unsafe built-in function 'input' is used in Python 2."
--- a/python/ql/src/Functions/ModificationOfParameterWithDefault.qhelp
+++ b/python/ql/src/Functions/ModificationOfParameterWithDefault.qhelp
@@ -36,7 +36,7 @@ function with a default of <code>default=None</code>, check if the parameter is
 </example>
 <references>

-  <li>Effbot: <a href="http://effbot.org/zone/default-values.htm">Default Parameter Values in Python</a>.</li>
+  <li>Effbot: <a href="https://web.archive.org/web/20201112004749/http://effbot.org/zone/default-values.htm">Default Parameter Values in Python</a>.</li>
  <li>Python Language Reference: <a href="http://docs.python.org/2/reference/compound_stmts.html#function-definitions">Function definitions</a>.</li>


--- a/python/ql/src/Functions/ReturnValueIgnored.qhelp
+++ b/python/ql/src/Functions/ReturnValueIgnored.qhelp
@@ -9,7 +9,7 @@ information being thrown away.</p>

 <p>A return value is considered to be trivial if it is <code>None</code> or it is a parameter (parameters, usually <code>self</code> are often
 returned to assist with method chaining, but can be ignored).
-A return value is also assumed to be trivial if it is ignored for 75% or more of calls.
+A return value is also assumed to be trivial if it is ignored for more than 25% of calls.
 </p>

 </overview>
--- a/python/ql/src/Imports/CyclicImport.qhelp
+++ b/python/ql/src/Imports/CyclicImport.qhelp
@@ -29,7 +29,7 @@ import that.

 <li>Python Language  Reference: <a href="http://docs.python.org/2/reference/simple_stmts.html#import">The import statement</a>.</li>
 <li>Python: <a href="http://docs.python.org/2/tutorial/modules.html">Modules</a>.</li>
-<li> Effbot: <a href="http://effbot.org/zone/import-confusion.htm">Import Confusion</a>.</li>
+<li> Effbot: <a href="https://web.archive.org/web/20200917011425/https://effbot.org/zone/import-confusion.htm">Import Confusion</a>.</li>


 </references>
--- a/python/ql/src/Imports/ModuleLevelCyclicImport.qhelp
+++ b/python/ql/src/Imports/ModuleLevelCyclicImport.qhelp
@@ -33,7 +33,7 @@ import that.

 <li>Python Language  Reference: <a href="http://docs.python.org/2/reference/simple_stmts.html#import">The import statement</a>.</li>
 <li>Python: <a href="http://docs.python.org/2/tutorial/modules.html">Modules</a>.</li>
-<li> Effbot: <a href="http://effbot.org/zone/import-confusion.htm">Import Confusion</a>.</li>
+<li> Effbot: <a href="https://web.archive.org/web/20200917011425/https://effbot.org/zone/import-confusion.htm">Import Confusion</a>.</li>


 </references>
--- a/python/ql/src/Metrics/ClassEfferentCoupling.qhelp
+++ b/python/ql/src/Metrics/ClassEfferentCoupling.qhelp
@@ -49,7 +49,7 @@ so the general technique is quite widely applicable.


 <li>
-IBM developerWorks: <a href="http://www.ibm.com/developerworks/library/j-eaed6/">Evolutionary architecture and emergent design: Emergent design through metrics</a>.
+IBM developerWorks: <a href="https://web.archive.org/web/20190919085934/https://www.ibm.com/developerworks/library/j-eaed6/">Evolutionary architecture and emergent design: Emergent design through metrics</a>.
 </li>
 <li>
 R. Martin, <em>Agile Software Development: Principles, Patterns and Practices</em>. Pearson, 2011.
--- a/python/ql/src/Metrics/ModuleEfferentCoupling.qhelp
+++ b/python/ql/src/Metrics/ModuleEfferentCoupling.qhelp
@@ -29,7 +29,7 @@ You can reduce efferent coupling by splitting up a module so that each part depe


 <li>
-IBM developerWorks: <a href="http://www.ibm.com/developerworks/library/j-eaed6/">Evolutionary architecture and emergent design: Emergent design through metrics</a>.
+IBM developerWorks: <a href="https://web.archive.org/web/20190919085934/https://www.ibm.com/developerworks/library/j-eaed6/">Evolutionary architecture and emergent design: Emergent design through metrics</a>.
 </li>
 <li>
 R. Martin, <em>Agile Software Development: Principles, Patterns and Practices</em>. Pearson, 2011.
--- a/python/ql/src/Security/CVE-2018-1281/BindToAllInterfaces.ql
+++ b/python/ql/src/Security/CVE-2018-1281/BindToAllInterfaces.ql
@@ -4,7 +4,9 @@
 * and is therefore associated with security risks.
 * @kind problem
 * @tags security
+ *       external/cwe/cwe-200
 * @problem.severity error
+ * @security-severity 3.6
 * @sub-severity low
 * @precision high
 * @id py/bind-socket-all-network-interfaces
@@ -32,21 +34,7 @@ private DataFlow::LocalSourceNode vulnerableHostnameRef(DataFlow::TypeTracker t,
    result.asExpr() = allInterfacesStrConst
  )
  or
-  // Due to bad performance when using normal setup with `vulnerableHostnameRef(t2, hostname).track(t2, t)`
-  // we have inlined that code and forced a join
-  exists(DataFlow::TypeTracker t2 |
-    exists(DataFlow::StepSummary summary |
-      vulnerableHostnameRef_first_join(t2, hostname, result, summary) and
-      t = t2.append(summary)
-    )
-  )
-}
-
-pragma[nomagic]
-private predicate vulnerableHostnameRef_first_join(
-  DataFlow::TypeTracker t2, string hostname, DataFlow::Node res, DataFlow::StepSummary summary
-) {
-  DataFlow::StepSummary::step(vulnerableHostnameRef(t2, hostname), res, summary)
+  exists(DataFlow::TypeTracker t2 | result = vulnerableHostnameRef(t2, hostname).track(t2, t))
 }

 /** Gets a reference to a hostname that can be used to bind to all interfaces. */
@@ -59,21 +47,7 @@ private DataFlow::LocalSourceNode vulnerableAddressTuple(DataFlow::TypeTracker t
  t.start() and
  result.asExpr() = any(Tuple tup | tup.getElt(0) = vulnerableHostnameRef(hostname).asExpr())
  or
-  // Due to bad performance when using normal setup with `vulnerableAddressTuple(t2, hostname).track(t2, t)`
-  // we have inlined that code and forced a join
-  exists(DataFlow::TypeTracker t2 |
-    exists(DataFlow::StepSummary summary |
-      vulnerableAddressTuple_first_join(t2, hostname, result, summary) and
-      t = t2.append(summary)
-    )
-  )
-}
-
-pragma[nomagic]
-private predicate vulnerableAddressTuple_first_join(
-  DataFlow::TypeTracker t2, string hostname, DataFlow::Node res, DataFlow::StepSummary summary
-) {
-  DataFlow::StepSummary::step(vulnerableAddressTuple(t2, hostname), res, summary)
+  exists(DataFlow::TypeTracker t2 | result = vulnerableAddressTuple(t2, hostname).track(t2, t))
 }

 /** Gets a reference to a tuple for which the first element is a hostname that can be used to bind to all interfaces. */
--- a/python/ql/src/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.ql
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.ql
@@ -5,6 +5,7 @@
 * @kind path-problem
 * @precision low
 * @problem.severity error
+ * @security-severity 5.9
 * @tags security external/cwe/cwe-20
 */

--- a/python/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql
+++ b/python/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql
@@ -3,6 +3,7 @@
 * @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected.
 * @kind problem
 * @problem.severity warning
+ * @security-severity 5.9
 * @precision high
 * @id py/incomplete-hostname-regexp
 * @tags correctness
--- a/python/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.ql
+++ b/python/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.ql
@@ -3,6 +3,7 @@
 * @description Security checks on the substrings of an unparsed URL are often vulnerable to bypassing.
 * @kind problem
 * @problem.severity warning
+ * @security-severity 5.9
 * @precision high
 * @id py/incomplete-url-substring-sanitization
 * @tags correctness
--- a/python/ql/src/Security/CWE-022/PathInjection.ql
+++ b/python/ql/src/Security/CWE-022/PathInjection.ql
@@ -3,6 +3,7 @@
 * @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
 * @kind path-problem
 * @problem.severity error
+ * @security-severity 6.4
 * @sub-severity high
 * @precision high
 * @id py/path-injection
--- a/python/ql/src/Security/CWE-022/TarSlip.ql
+++ b/python/ql/src/Security/CWE-022/TarSlip.ql
@@ -6,6 +6,7 @@
 * @kind path-problem
 * @id py/tarslip
 * @problem.severity error
+ * @security-severity 6.4
 * @precision medium
 * @tags security
 *       external/cwe/cwe-022
--- a/python/ql/src/Security/CWE-078/CommandInjection.ql
+++ b/python/ql/src/Security/CWE-078/CommandInjection.ql
@@ -4,6 +4,7 @@
 *              user to change the meaning of the command.
 * @kind path-problem
 * @problem.severity error
+ * @security-severity 5.9
 * @sub-severity high
 * @precision high
 * @id py/command-line-injection
--- a/python/ql/src/Security/CWE-079/Jinja2WithoutEscaping.ql
+++ b/python/ql/src/Security/CWE-079/Jinja2WithoutEscaping.ql
@@ -4,6 +4,7 @@
 *              cause a cross-site scripting vulnerability.
 * @kind problem
 * @problem.severity error
+ * @security-severity 2.9
 * @precision medium
 * @id py/jinja2/autoescape-false
 * @tags security
--- a/python/ql/src/Security/CWE-079/ReflectedXss.ql
+++ b/python/ql/src/Security/CWE-079/ReflectedXss.ql
@@ -4,6 +4,7 @@
 *              allows for a cross-site scripting vulnerability.
 * @kind path-problem
 * @problem.severity error
+ * @security-severity 2.9
 * @sub-severity high
 * @precision high
 * @id py/reflective-xss
--- a/python/ql/src/Security/CWE-089/SqlInjection.ql
+++ b/python/ql/src/Security/CWE-089/SqlInjection.ql
@@ -4,6 +4,7 @@
 *              malicious SQL code by the user.
 * @kind path-problem
 * @problem.severity error
+ * @security-severity 6.4
 * @precision high
 * @id py/sql-injection
 * @tags security
--- a/python/ql/src/Security/CWE-094/CodeInjection.ql
+++ b/python/ql/src/Security/CWE-094/CodeInjection.ql
@@ -4,6 +4,7 @@
 *              code execution.
 * @kind path-problem
 * @problem.severity error
+ * @security-severity 10.0
 * @sub-severity high
 * @precision high
 * @id py/code-injection
--- a/python/ql/src/Security/CWE-209/StackTraceExposure.ql
+++ b/python/ql/src/Security/CWE-209/StackTraceExposure.ql
@@ -5,6 +5,7 @@
 *              developing a subsequent exploit.
 * @kind path-problem
 * @problem.severity error
+ * @security-severity 3.6
 * @precision high
 * @id py/stack-trace-exposure
 * @tags security
--- a/python/ql/src/Security/CWE-215/FlaskDebug.ql
+++ b/python/ql/src/Security/CWE-215/FlaskDebug.ql
@@ -3,6 +3,7 @@
 * @description Running a Flask app in debug mode may allow an attacker to run arbitrary code through the Werkzeug debugger.
 * @kind problem
 * @problem.severity error
+ * @security-severity 6.4
 * @precision high
 * @id py/flask-debug
 * @tags security
--- a/python/ql/src/Security/CWE-295/MissingHostKeyValidation.ql
+++ b/python/ql/src/Security/CWE-295/MissingHostKeyValidation.ql
@@ -3,6 +3,7 @@
 * @description Accepting unknown host keys can allow man-in-the-middle attacks.
 * @kind problem
 * @problem.severity error
+ * @security-severity 5.2
 * @precision high
 * @id py/paramiko-missing-host-key-validation
 * @tags security
--- a/python/ql/src/Security/CWE-295/RequestWithoutValidation.ql
+++ b/python/ql/src/Security/CWE-295/RequestWithoutValidation.ql
@@ -3,6 +3,7 @@
 * @description Making a request without certificate validation can allow man-in-the-middle attacks.
 * @kind problem
 * @problem.severity error
+ * @security-severity 5.2
 * @precision medium
 * @id py/request-without-cert-validation
 * @tags security
--- a/python/ql/src/Security/CWE-312/CleartextLogging.ql
+++ b/python/ql/src/Security/CWE-312/CleartextLogging.ql
@@ -4,6 +4,7 @@
 *              expose it to an attacker.
 * @kind path-problem
 * @problem.severity error
+ * @security-severity 5.9
 * @precision high
 * @id py/clear-text-logging-sensitive-data
 * @tags security
--- a/python/ql/src/Security/CWE-312/CleartextStorage.ql
+++ b/python/ql/src/Security/CWE-312/CleartextStorage.ql
@@ -4,6 +4,7 @@
 *              attacker.
 * @kind path-problem
 * @problem.severity error
+ * @security-severity 5.9
 * @precision high
 * @id py/clear-text-storage-sensitive-data
 * @tags security
--- a/python/ql/src/Security/CWE-326/WeakCryptoKey.ql
+++ b/python/ql/src/Security/CWE-326/WeakCryptoKey.ql
@@ -3,6 +3,7 @@
 * @description Use of a cryptographic key that is too small may allow the encryption to be broken.
 * @kind problem
 * @problem.severity error
+ * @security-severity 5.2
 * @precision high
 * @id py/weak-crypto-key
 * @tags security
--- a/python/ql/src/Security/CWE-327/BrokenCryptoAlgorithm.qhelp
+++ b/python/ql/src/Security/CWE-327/BrokenCryptoAlgorithm.qhelp
@@ -15,22 +15,28 @@
               secure than it appears to be.
          </p>

+          <p>
+               This query alerts on any use of a weak cryptographic algorithm, that is
+               not a hashing algorithm. Use of broken or weak cryptographic hash
+               functions are handled by the
+               <code>py/weak-sensitive-data-hashing</code> query.
+          </p>
+
     </overview>
     <recommendation>

          <p>
               Ensure that you use a strong, modern cryptographic
-               algorithm. Use at least AES-128 or RSA-2048 for
-               encryption, and SHA-2 or SHA-3 for secure hashing.
+               algorithm, such as AES-128 or RSA-2048.
          </p>

     </recommendation>
     <example>

          <p>
-               The following code uses the <code>pycrypto</code>
+               The following code uses the <code>pycryptodome</code>
               library to encrypt some secret data. When you create a cipher using
-               <code>pycrypto</code> you must specify the encryption
+               <code>pycryptodome</code> you must specify the encryption
               algorithm to use. The first example uses DES, which is an
               older algorithm that is now considered weak. The second
               example uses AES, which is a stronger modern algorithm.
@@ -39,8 +45,12 @@
          <sample src="examples/broken_crypto.py" />

          <p>
-               WARNING: Although the second example above is more robust,
-               pycrypto is no longer actively maintained so we recommend using <code>cryptography</code> instead.
+               NOTICE: the original
+               <code><a href="https://pypi.org/project/pycrypto/">pycrypto</a></code>
+               PyPI package that provided the <code>Crypto</code> module is not longer
+               actively maintained, so you should use the
+               <code><a href="https://pypi.org/project/pycryptodome/">pycryptodome</a></code>
+               PyPI package instead (which has a compatible API).
          </p>

     </example>
--- a/python/ql/src/Security/CWE-327/BrokenCryptoAlgorithm.ql
+++ b/python/ql/src/Security/CWE-327/BrokenCryptoAlgorithm.ql
@@ -1,8 +1,9 @@
 /**
 * @name Use of a broken or weak cryptographic algorithm
 * @description Using broken or weak cryptographic algorithms can compromise security.
- * @kind path-problem
+ * @kind problem
 * @problem.severity warning
+ * @security-severity 5.2
 * @precision high
 * @id py/weak-cryptographic-algorithm
 * @tags security
@@ -10,21 +11,15 @@
 */

 import python
-import semmle.python.security.Paths
-import semmle.python.security.SensitiveData
-import semmle.python.security.Crypto
+import semmle.python.Concepts

-class BrokenCryptoConfiguration extends TaintTracking::Configuration {
-  BrokenCryptoConfiguration() { this = "Broken crypto configuration" }
-
-  override predicate isSource(TaintTracking::Source source) {
-    source instanceof SensitiveDataSource
-  }
-
-  override predicate isSink(TaintTracking::Sink sink) { sink instanceof WeakCryptoSink }
-}
-
-from BrokenCryptoConfiguration config, TaintedPathSource src, TaintedPathSink sink
-where config.hasFlowPath(src, sink)
-select sink.getSink(), src, sink, "$@ is used in a broken or weak cryptographic algorithm.",
-  src.getSource(), "Sensitive data"
+from Cryptography::CryptographicOperation operation, Cryptography::CryptographicAlgorithm algorithm
+where
+  algorithm = operation.getAlgorithm() and
+  algorithm.isWeak() and
+  // `Cryptography::HashingAlgorithm` and `Cryptography::PasswordHashingAlgorithm` are
+  // handled by `py/weak-sensitive-data-hashing`
+  algorithm instanceof Cryptography::EncryptionAlgorithm
+select operation,
+  "The cryptographic algorithm " + algorithm.getName() +
+    " is broken or weak, and should not be used."
--- a/python/ql/src/Security/CWE-327/FluentApiModel.qll
+++ b/python/ql/src/Security/CWE-327/FluentApiModel.qll
@@ -0,0 +1,103 @@
+private import python
+private import semmle.python.dataflow.new.DataFlow
+import TlsLibraryModel
+
+/**
+ * Configuration to determine the state of a context being used to create
+ * a connection. There is one configuration for each pair of `TlsLibrary` and `ProtocolVersion`,
+ * such that a single configuration only tracks contexts where a specific `ProtocolVersion` is allowed.
+ *
+ * The state is in terms of whether a specific protocol is allowed. This is
+ * either true or false when the context is created and can then be modified
+ * later by either restricting or unrestricting the protocol (see the predicates
+ * `isRestriction` and `isUnrestriction`).
+ *
+ * Since we are interested in the final state, we want the flow to start from
+ * the last unrestriction, so we disallow flow into unrestrictions. We also
+ * model the creation as an unrestriction of everything it allows, to account
+ * for the common case where the creation plays the role of "last unrestriction".
+ *
+ * Since we really want "the last unrestriction, not nullified by a restriction",
+ * we also disallow flow into restrictions.
+ */
+class InsecureContextConfiguration extends DataFlow::Configuration {
+  TlsLibrary library;
+  ProtocolVersion tracked_version;
+
+  InsecureContextConfiguration() {
+    this = library + "Allows" + tracked_version and
+    tracked_version.isInsecure()
+  }
+
+  ProtocolVersion getTrackedVersion() { result = tracked_version }
+
+  override predicate isSource(DataFlow::Node source) { this.isUnrestriction(source) }
+
+  override predicate isSink(DataFlow::Node sink) {
+    sink = library.connection_creation().getContext()
+  }
+
+  override predicate isBarrierIn(DataFlow::Node node) {
+    this.isRestriction(node)
+    or
+    this.isUnrestriction(node)
+  }
+
+  private predicate isRestriction(DataFlow::Node node) {
+    exists(ProtocolRestriction r |
+      r = library.protocol_restriction() and
+      r.getRestriction() = tracked_version
+    |
+      node = r.getContext()
+    )
+  }
+
+  private predicate isUnrestriction(DataFlow::Node node) {
+    exists(ProtocolUnrestriction pu |
+      pu = library.protocol_unrestriction() and
+      pu.getUnrestriction() = tracked_version
+    |
+      node = pu.getContext()
+    )
+  }
+}
+
+/**
+ * Holds if `conectionCreation` marks the creation of a connetion based on the contex
+ * found at `contextOrigin` and allowing `insecure_version`.
+ *
+ * `specific` is true iff the context is configured for a specific protocol version (`ssl.PROTOCOL_TLSv1_2`) rather
+ * than for a family of protocols (`ssl.PROTOCOL_TLS`).
+ */
+predicate unsafe_connection_creation_with_context(
+  DataFlow::Node connectionCreation, ProtocolVersion insecure_version, DataFlow::Node contextOrigin,
+  boolean specific
+) {
+  // Connection created from a context allowing `insecure_version`.
+  exists(InsecureContextConfiguration c | c.hasFlow(contextOrigin, connectionCreation) |
+    insecure_version = c.getTrackedVersion() and
+    specific = false
+  )
+  or
+  // Connection created from a context specifying `insecure_version`.
+  exists(TlsLibrary l |
+    connectionCreation = l.insecure_connection_creation(insecure_version) and
+    contextOrigin = connectionCreation and
+    specific = true
+  )
+}
+
+/**
+ * Holds if `conectionCreation` marks the creation of a connetion witout reference to a context
+ * and allowing `insecure_version`.
+ */
+predicate unsafe_connection_creation_without_context(
+  DataFlow::CallCfgNode connectionCreation, string insecure_version
+) {
+  exists(TlsLibrary l | connectionCreation = l.insecure_connection_creation(insecure_version))
+}
+
+/** Holds if `contextCreation` is creating a context tied to a specific insecure version. */
+predicate unsafe_context_creation(DataFlow::CallCfgNode contextCreation, string insecure_version) {
+  exists(TlsLibrary l | contextCreation = l.insecure_context_creation(insecure_version))
+}
--- a/python/ql/src/Security/CWE-327/InsecureDefaultProtocol.ql
+++ b/python/ql/src/Security/CWE-327/InsecureDefaultProtocol.ql
@@ -5,6 +5,7 @@
 * @id py/insecure-default-protocol
 * @kind problem
 * @problem.severity warning
+ * @security-severity 5.2
 * @precision high
 * @tags security
 *       external/cwe/cwe-327
--- a/python/ql/src/Security/CWE-327/InsecureProtocol.qhelp
+++ b/python/ql/src/Security/CWE-327/InsecureProtocol.qhelp
@@ -13,8 +13,8 @@

          <p>
            Ensure that a modern, strong protocol is used. All versions of SSL,
-            and TLS 1.0 are known to be vulnerable to attacks. Using TLS 1.1 or
-            above is strongly recommended.
+            and TLS versions 1.0 and 1.1 are known to be vulnerable to attacks.
+            Using TLS 1.2 or above is strongly recommended.
          </p>

     </recommendation>
@@ -30,20 +30,35 @@

          <p>
            All cases should be updated to use a secure protocol, such as
-            <code>PROTOCOL_TLSv1_1</code>.
+            <code>PROTOCOL_TLSv1_2</code>.
          </p>
          <p>
            Note that <code>ssl.wrap_socket</code> has been deprecated in
-            Python 3.7. A preferred alternative is to use
-            <code>ssl.SSLContext</code>, which is supported in Python 2.7.9 and
-            3.2 and later versions.
+            Python 3.7. The recommended alternatives are:
          </p>
+          <ul>
+            <li><code>ssl.SSLContext</code> - supported in Python 2.7.9,
+                  3.2, and later versions</li>
+            <li><code>ssl.create_default_context</code> - a convenience function,
+                  supported in Python 3.4 and later versions.</li>
+          </ul>
+          <p>
+            Even when you use these alternatives, you should
+            ensure that a safe protocol is used. The following code illustrates
+            how to use flags (available since Python 3.2) or the `minimum_version`
+            field (favored since Python 3.7) to restrict the protocols accepted when
+            creating a connection.
+          </p>
+
+          <sample src="examples/secure_default_protocol.py" />
     </example>

     <references>
       <li>Wikipedia: <a href="https://en.wikipedia.org/wiki/Transport_Layer_Security"> Transport Layer Security</a>.</li>
       <li>Python 3 documentation: <a href="https://docs.python.org/3/library/ssl.html#ssl.SSLContext"> class ssl.SSLContext</a>.</li>
       <li>Python 3 documentation: <a href="https://docs.python.org/3/library/ssl.html#ssl.wrap_socket"> ssl.wrap_socket</a>.</li>
+       <li>Python 3 documentation: <a href="https://docs.python.org/3/library/ssl.html#functions-constants-and-exceptions"> notes on context creation</a>.</li>
+       <li>Python 3 documentation: <a href="https://docs.python.org/3/library/ssl.html#ssl-security"> notes on security considerations</a>.</li>
       <li>pyOpenSSL documentation: <a href="https://pyopenssl.org/en/stable/api/ssl.html"> An interface to the SSL-specific parts of OpenSSL</a>.</li>
     </references>

--- a/python/ql/src/Security/CWE-327/InsecureProtocol.ql
+++ b/python/ql/src/Security/CWE-327/InsecureProtocol.ql
@@ -4,92 +4,84 @@
 * @id py/insecure-protocol
 * @kind problem
 * @problem.severity warning
+ * @security-severity 5.2
 * @precision high
 * @tags security
 *       external/cwe/cwe-327
 */

 import python
+import semmle.python.dataflow.new.DataFlow
+import FluentApiModel

-private ModuleValue the_ssl_module() { result = Module::named("ssl") }
-
-FunctionValue ssl_wrap_socket() { result = the_ssl_module().attr("wrap_socket") }
-
-ClassValue ssl_Context_class() { result = the_ssl_module().attr("SSLContext") }
-
-private ModuleValue the_pyOpenSSL_module() { result = Value::named("pyOpenSSL.SSL") }
-
-ClassValue the_pyOpenSSL_Context_class() { result = Value::named("pyOpenSSL.SSL.Context") }
-
-string insecure_version_name() {
-  // For `pyOpenSSL.SSL`
-  result = "SSLv2_METHOD" or
-  result = "SSLv23_METHOD" or
-  result = "SSLv3_METHOD" or
-  result = "TLSv1_METHOD" or
-  // For the `ssl` module
-  result = "PROTOCOL_SSLv2" or
-  result = "PROTOCOL_SSLv3" or
-  result = "PROTOCOL_SSLv23" or
-  result = "PROTOCOL_TLS" or
-  result = "PROTOCOL_TLSv1"
-}
-
-/*
- * A syntactic check for cases where points-to analysis cannot infer the presence of
- * a protocol constant, e.g. if it has been removed in later versions of the `ssl`
- * library.
- */
-
-bindingset[named_argument]
-predicate probable_insecure_ssl_constant(
-  CallNode call, string insecure_version, string named_argument
-) {
-  exists(ControlFlowNode arg |
-    arg = call.getArgByName(named_argument) or
-    arg = call.getArg(0)
-  |
-    arg.(AttrNode).getObject(insecure_version).pointsTo(the_ssl_module())
+// Helper for pretty printer `configName`.
+// This is a consequence of missing pretty priting.
+// We do not want to evaluate our bespoke pretty printer
+// for all `DataFlow::Node`s so we define a sub class of interesting ones.
+class ProtocolConfiguration extends DataFlow::Node {
+  ProtocolConfiguration() {
+    unsafe_connection_creation_with_context(_, _, this, _)
    or
-    arg.(NameNode).getId() = insecure_version and
-    exists(Import imp |
-      imp.getAnImportedModuleName() = "ssl" and
-      imp.getAName().getAsname().(Name).getId() = insecure_version
-    )
-  )
-}
-
-predicate unsafe_ssl_wrap_socket_call(
-  CallNode call, string method_name, string insecure_version, string named_argument
-) {
-  (
-    call = ssl_wrap_socket().getACall() and
-    method_name = "deprecated method ssl.wrap_socket" and
-    named_argument = "ssl_version"
+    unsafe_connection_creation_without_context(this, _)
    or
-    call = ssl_Context_class().getACall() and
-    named_argument = "protocol" and
-    method_name = "ssl.SSLContext"
-  ) and
-  insecure_version = insecure_version_name() and
-  (
-    call.getArgByName(named_argument).pointsTo(the_ssl_module().attr(insecure_version))
+    unsafe_context_creation(this, _)
+  }
+
+  AstNode getNode() { result = this.asCfgNode().(CallNode).getFunction().getNode() }
+}
+
+// Helper for pretty printer `callName`.
+// This is a consequence of missing pretty priting.
+// We do not want to evaluate our bespoke pretty printer
+// for all `AstNode`s so we define a sub class of interesting ones.
+//
+// Note that AstNode is abstract and AstNode_ is a library class, so
+// we have to extend @py_ast_node.
+class Nameable extends @py_ast_node {
+  Nameable() {
+    this = any(ProtocolConfiguration pc).getNode()
    or
-    probable_insecure_ssl_constant(call, insecure_version, named_argument)
-  )
+    exists(Nameable attr | this = attr.(Attribute).getObject())
+  }
+
+  string toString() { result = "AstNode" }
 }

-predicate unsafe_pyOpenSSL_Context_call(CallNode call, string insecure_version) {
-  call = the_pyOpenSSL_Context_class().getACall() and
-  insecure_version = insecure_version_name() and
-  call.getArg(0).pointsTo(the_pyOpenSSL_module().attr(insecure_version))
-}
-
-from CallNode call, string method_name, string insecure_version
-where
-  unsafe_ssl_wrap_socket_call(call, method_name, insecure_version, _)
+string callName(Nameable call) {
+  result = call.(Name).getId()
  or
-  unsafe_pyOpenSSL_Context_call(call, insecure_version) and method_name = "pyOpenSSL.SSL.Context"
-select call,
-  "Insecure SSL/TLS protocol version " + insecure_version + " specified in call to " + method_name +
-    "."
+  exists(Attribute a | a = call | result = callName(a.getObject()) + "." + a.getName())
+}
+
+string configName(ProtocolConfiguration protocolConfiguration) {
+  result =
+    "call to " + callName(protocolConfiguration.asCfgNode().(CallNode).getFunction().getNode())
+  or
+  not protocolConfiguration.asCfgNode() instanceof CallNode and
+  not protocolConfiguration instanceof ContextCreation and
+  result = "context modification"
+}
+
+string verb(boolean specific) {
+  specific = true and result = "specified"
+  or
+  specific = false and result = "allowed"
+}
+
+from
+  DataFlow::Node connectionCreation, string insecure_version, DataFlow::Node protocolConfiguration,
+  boolean specific
+where
+  unsafe_connection_creation_with_context(connectionCreation, insecure_version,
+    protocolConfiguration, specific)
+  or
+  unsafe_connection_creation_without_context(connectionCreation, insecure_version) and
+  protocolConfiguration = connectionCreation and
+  specific = true
+  or
+  unsafe_context_creation(protocolConfiguration, insecure_version) and
+  connectionCreation = protocolConfiguration and
+  specific = true
+select connectionCreation,
+  "Insecure SSL/TLS protocol version " + insecure_version + " " + verb(specific) + " by $@ ",
+  protocolConfiguration, configName(protocolConfiguration)
--- a/python/ql/src/Security/CWE-327/PyOpenSSL.qll
+++ b/python/ql/src/Security/CWE-327/PyOpenSSL.qll
@@ -0,0 +1,83 @@
+/**
+ * Provides modeling of SSL/TLS functionality of the `OpenSSL` module from the `pyOpenSSL` PyPI package.
+ * See https://www.pyopenssl.org/en/stable/
+ */
+
+private import python
+private import semmle.python.ApiGraphs
+import TlsLibraryModel
+
+class PyOpenSSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
+  PyOpenSSLContextCreation() {
+    this = API::moduleImport("OpenSSL").getMember("SSL").getMember("Context").getACall()
+  }
+
+  override string getProtocol() {
+    exists(ControlFlowNode protocolArg, PyOpenSSL pyo |
+      protocolArg in [node.getArg(0), node.getArgByName("method")]
+    |
+      protocolArg =
+        [pyo.specific_version(result).getAUse(), pyo.unspecific_version(result).getAUse()]
+            .asCfgNode()
+    )
+  }
+}
+
+class ConnectionCall extends ConnectionCreation, DataFlow::CallCfgNode {
+  ConnectionCall() {
+    this = API::moduleImport("OpenSSL").getMember("SSL").getMember("Connection").getACall()
+  }
+
+  override DataFlow::CfgNode getContext() {
+    result.getNode() in [node.getArg(0), node.getArgByName("context")]
+  }
+}
+
+// This cannot be used to unrestrict,
+// see https://www.pyopenssl.org/en/stable/api/ssl.html#OpenSSL.SSL.Context.set_options
+class SetOptionsCall extends ProtocolRestriction, DataFlow::CallCfgNode {
+  SetOptionsCall() { node.getFunction().(AttrNode).getName() = "set_options" }
+
+  override DataFlow::CfgNode getContext() {
+    result.getNode() = node.getFunction().(AttrNode).getObject()
+  }
+
+  override ProtocolVersion getRestriction() {
+    API::moduleImport("OpenSSL").getMember("SSL").getMember("OP_NO_" + result).getAUse().asCfgNode() in [
+        node.getArg(0), node.getArgByName("options")
+      ]
+  }
+}
+
+class UnspecificPyOpenSSLContextCreation extends PyOpenSSLContextCreation, UnspecificContextCreation {
+  UnspecificPyOpenSSLContextCreation() { library instanceof PyOpenSSL }
+}
+
+class PyOpenSSL extends TlsLibrary {
+  PyOpenSSL() { this = "pyOpenSSL" }
+
+  override string specific_version_name(ProtocolVersion version) { result = version + "_METHOD" }
+
+  override string unspecific_version_name(ProtocolFamily family) {
+    // `"TLS_METHOD"` is not actually available in pyOpenSSL yet, but should be coming soon..
+    result = family + "_METHOD"
+  }
+
+  override API::Node version_constants() { result = API::moduleImport("OpenSSL").getMember("SSL") }
+
+  override ContextCreation default_context_creation() { none() }
+
+  override ContextCreation specific_context_creation() {
+    result instanceof PyOpenSSLContextCreation
+  }
+
+  override DataFlow::Node insecure_connection_creation(ProtocolVersion version) { none() }
+
+  override ConnectionCreation connection_creation() { result instanceof ConnectionCall }
+
+  override ProtocolRestriction protocol_restriction() { result instanceof SetOptionsCall }
+
+  override ProtocolUnrestriction protocol_unrestriction() {
+    result instanceof UnspecificPyOpenSSLContextCreation
+  }
+}
--- a/python/ql/src/Security/CWE-327/README.md
+++ b/python/ql/src/Security/CWE-327/README.md
@@ -0,0 +1,24 @@
+# Current status (Feb 2021)
+
+This should be kept up to date; the world is moving fast and protocols are being broken.
+
+## Protocols
+
+- All versions of SSL are insecure
+- TLS 1.0 and TLS 1.1 are insecure
+- TLS 1.2 have some issues. but TLS 1.3 is not widely supported
+
+## Conection methods
+
+- `ssl.wrap_socket` is creating insecure connections, use `SSLContext.wrap_socket` instead. [link](https://docs.python.org/3/library/ssl.html#ssl.wrap_socket)
+    > Deprecated since version 3.7: Since Python 3.2 and 2.7.9, it is recommended to use the `SSLContext.wrap_socket()` instead of `wrap_socket()`. The top-level function is limited and creates an insecure client socket without server name indication or hostname matching.
+- Default constructors are fine, a fluent API is used to constrain possible protocols later.
+
+## Current recomendation
+
+TLS 1.2 or TLS 1.3
+
+## Queries
+
+- `InsecureProtocol` detects uses of insecure protocols.
+- `InsecureDefaultProtocol` detect default constructions, this is no longer unsafe.
--- a/python/ql/src/Security/CWE-327/Ssl.qll
+++ b/python/ql/src/Security/CWE-327/Ssl.qll
@@ -0,0 +1,214 @@
+/**
+ * Provides modeling of SSL/TLS functionality of the `ssl` module from the standard library.
+ * See https://docs.python.org/3.9/library/ssl.html
+ */
+
+private import python
+private import semmle.python.ApiGraphs
+import TlsLibraryModel
+
+class SSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
+  SSLContextCreation() { this = API::moduleImport("ssl").getMember("SSLContext").getACall() }
+
+  override string getProtocol() {
+    exists(ControlFlowNode protocolArg, Ssl ssl |
+      protocolArg in [node.getArg(0), node.getArgByName("protocol")]
+    |
+      protocolArg =
+        [ssl.specific_version(result).getAUse(), ssl.unspecific_version(result).getAUse()]
+            .asCfgNode()
+    )
+    or
+    not exists(node.getAnArg()) and
+    result = "TLS"
+  }
+}
+
+class SSLDefaultContextCreation extends ContextCreation {
+  SSLDefaultContextCreation() {
+    this = API::moduleImport("ssl").getMember("create_default_context").getACall()
+  }
+
+  // Allowed insecure versions are "TLSv1" and "TLSv1_1"
+  // see https://docs.python.org/3/library/ssl.html#context-creation
+  override string getProtocol() { result = "TLS" }
+}
+
+/** Gets a reference to an `ssl.Context` instance. */
+API::Node sslContextInstance() {
+  result = API::moduleImport("ssl").getMember(["SSLContext", "create_default_context"]).getReturn()
+}
+
+class WrapSocketCall extends ConnectionCreation, DataFlow::CallCfgNode {
+  WrapSocketCall() { this = sslContextInstance().getMember("wrap_socket").getACall() }
+
+  override DataFlow::Node getContext() {
+    result = this.getFunction().(DataFlow::AttrRead).getObject()
+  }
+}
+
+class OptionsAugOr extends ProtocolRestriction, DataFlow::CfgNode {
+  ProtocolVersion restriction;
+
+  OptionsAugOr() {
+    exists(AugAssign aa, AttrNode attr, Expr flag |
+      aa.getOperation().getOp() instanceof BitOr and
+      aa.getTarget() = attr.getNode() and
+      attr.getName() = "options" and
+      attr.getObject() = node and
+      flag = API::moduleImport("ssl").getMember("OP_NO_" + restriction).getAUse().asExpr() and
+      (
+        aa.getValue() = flag
+        or
+        impliesBitSet(aa.getValue(), flag, false, false)
+      )
+    )
+  }
+
+  override DataFlow::Node getContext() { result = this }
+
+  override ProtocolVersion getRestriction() { result = restriction }
+}
+
+class OptionsAugAndNot extends ProtocolUnrestriction, DataFlow::CfgNode {
+  ProtocolVersion restriction;
+
+  OptionsAugAndNot() {
+    exists(AugAssign aa, AttrNode attr, Expr flag, UnaryExpr notFlag |
+      aa.getOperation().getOp() instanceof BitAnd and
+      aa.getTarget() = attr.getNode() and
+      attr.getName() = "options" and
+      attr.getObject() = node and
+      notFlag.getOp() instanceof Invert and
+      notFlag.getOperand() = flag and
+      flag = API::moduleImport("ssl").getMember("OP_NO_" + restriction).getAUse().asExpr() and
+      (
+        aa.getValue() = notFlag
+        or
+        impliesBitSet(aa.getValue(), notFlag, true, true)
+      )
+    )
+  }
+
+  override DataFlow::Node getContext() { result = this }
+
+  override ProtocolVersion getUnrestriction() { result = restriction }
+}
+
+/**
+ * Holds if
+ *   for every bit, _b_:
+ *     `wholeHasBitSet` represents that _b_ is set in `whole`
+ *     implies
+ *     `partHasBitSet` represents that _b_ is set in `part`
+ *
+ * As an example take `whole` = `part1 & part2`. Then
+ * `impliesBitSet(whole, part1, true, true)` holds
+ * because for any bit in `whole`, if that bit is set it must also be set in `part1`.
+ *
+ * Similarly for `whole` = `part1 | part2`. Here
+ * `impliesBitSet(whole, part1, false, false)` holds
+ * because for any bit in `whole`, if that bit is not set, it cannot be set in `part1`.
+ */
+predicate impliesBitSet(BinaryExpr whole, Expr part, boolean partHasBitSet, boolean wholeHasBitSet) {
+  whole.getOp() instanceof BitAnd and
+  (
+    wholeHasBitSet = true and partHasBitSet = true and part in [whole.getLeft(), whole.getRight()]
+    or
+    wholeHasBitSet = true and
+    impliesBitSet([whole.getLeft(), whole.getRight()], part, partHasBitSet, wholeHasBitSet)
+  )
+  or
+  whole.getOp() instanceof BitOr and
+  (
+    wholeHasBitSet = false and partHasBitSet = false and part in [whole.getLeft(), whole.getRight()]
+    or
+    wholeHasBitSet = false and
+    impliesBitSet([whole.getLeft(), whole.getRight()], part, partHasBitSet, wholeHasBitSet)
+  )
+}
+
+class ContextSetVersion extends ProtocolRestriction, ProtocolUnrestriction, DataFlow::CfgNode {
+  ProtocolVersion restriction;
+
+  ContextSetVersion() {
+    exists(DataFlow::AttrWrite aw |
+      aw.getObject().asCfgNode() = node and
+      aw.getAttributeName() = "minimum_version" and
+      aw.getValue() =
+        API::moduleImport("ssl").getMember("TLSVersion").getMember(restriction).getAUse()
+    )
+  }
+
+  override DataFlow::Node getContext() { result = this }
+
+  override ProtocolVersion getRestriction() { result.lessThan(restriction) }
+
+  override ProtocolVersion getUnrestriction() {
+    restriction = result or restriction.lessThan(result)
+  }
+}
+
+class UnspecificSSLContextCreation extends SSLContextCreation, UnspecificContextCreation {
+  UnspecificSSLContextCreation() { library instanceof Ssl }
+
+  override ProtocolVersion getUnrestriction() {
+    result = UnspecificContextCreation.super.getUnrestriction() and
+    // These are turned off by default since Python 3.6
+    // see https://docs.python.org/3.6/library/ssl.html#ssl.SSLContext
+    not result in ["SSLv2", "SSLv3"]
+  }
+}
+
+class UnspecificSSLDefaultContextCreation extends SSLDefaultContextCreation, ProtocolUnrestriction {
+  override DataFlow::Node getContext() { result = this }
+
+  // see https://docs.python.org/3/library/ssl.html#ssl.create_default_context
+  override ProtocolVersion getUnrestriction() {
+    result in ["TLSv1", "TLSv1_1", "TLSv1_2", "TLSv1_3"]
+  }
+}
+
+class Ssl extends TlsLibrary {
+  Ssl() { this = "ssl" }
+
+  override string specific_version_name(ProtocolVersion version) { result = "PROTOCOL_" + version }
+
+  override string unspecific_version_name(ProtocolFamily family) {
+    family = "SSLv23" and result = "PROTOCOL_" + family
+    or
+    family = "TLS" and result = "PROTOCOL_" + family + ["", "_CLIENT", "_SERVER"]
+  }
+
+  override API::Node version_constants() { result = API::moduleImport("ssl") }
+
+  override ContextCreation default_context_creation() {
+    result instanceof SSLDefaultContextCreation
+  }
+
+  override ContextCreation specific_context_creation() { result instanceof SSLContextCreation }
+
+  override DataFlow::CallCfgNode insecure_connection_creation(ProtocolVersion version) {
+    result = API::moduleImport("ssl").getMember("wrap_socket").getACall() and
+    this.specific_version(version).getAUse() = result.getArgByName("ssl_version") and
+    version.isInsecure()
+  }
+
+  override ConnectionCreation connection_creation() { result instanceof WrapSocketCall }
+
+  override ProtocolRestriction protocol_restriction() {
+    result instanceof OptionsAugOr
+    or
+    result instanceof ContextSetVersion
+  }
+
+  override ProtocolUnrestriction protocol_unrestriction() {
+    result instanceof OptionsAugAndNot
+    or
+    result instanceof ContextSetVersion
+    or
+    result instanceof UnspecificSSLContextCreation
+    or
+    result instanceof UnspecificSSLDefaultContextCreation
+  }
+}
--- a/python/ql/src/Security/CWE-327/TlsLibraryModel.qll
+++ b/python/ql/src/Security/CWE-327/TlsLibraryModel.qll
@@ -0,0 +1,137 @@
+private import python
+private import semmle.python.ApiGraphs
+import Ssl
+import PyOpenSSL
+
+/**
+ * A specific protocol version of SSL or TLS.
+ */
+class ProtocolVersion extends string {
+  ProtocolVersion() { this in ["SSLv2", "SSLv3", "TLSv1", "TLSv1_1", "TLSv1_2", "TLSv1_3"] }
+
+  /** Gets a `ProtocolVersion` that is less than this `ProtocolVersion`, if any. */
+  predicate lessThan(ProtocolVersion version) {
+    this = "SSLv2" and version = "SSLv3"
+    or
+    this = "TLSv1" and version = ["TLSv1_1", "TLSv1_2", "TLSv1_3"]
+    or
+    this = ["TLSv1", "TLSv1_1"] and version = ["TLSv1_2", "TLSv1_3"]
+    or
+    this = ["TLSv1", "TLSv1_1", "TLSv1_2"] and version = "TLSv1_3"
+  }
+
+  /** Holds if this protocol version is known to be insecure. */
+  predicate isInsecure() { this in ["SSLv2", "SSLv3", "TLSv1", "TLSv1_1"] }
+}
+
+/** An unspecific protocol version */
+class ProtocolFamily extends string {
+  ProtocolFamily() { this in ["SSLv23", "TLS"] }
+}
+
+/** The creation of a context. */
+abstract class ContextCreation extends DataFlow::Node {
+  /** Gets the protocol version or family for this context. */
+  abstract string getProtocol();
+}
+
+/** The creation of a connection from a context. */
+abstract class ConnectionCreation extends DataFlow::Node {
+  /** Gets the context used to create the connection. */
+  abstract DataFlow::Node getContext();
+}
+
+/** A context is being restricted on which protocols it can accepts. */
+abstract class ProtocolRestriction extends DataFlow::Node {
+  /** Gets the context being restricted. */
+  abstract DataFlow::Node getContext();
+
+  /** Gets the protocol version being disallowed. */
+  abstract ProtocolVersion getRestriction();
+}
+
+/** A context is being relaxed on which protocols it can accepts. */
+abstract class ProtocolUnrestriction extends DataFlow::Node {
+  /** Gets the context being relaxed. */
+  abstract DataFlow::Node getContext();
+
+  /** Gets the protocol version being allowed. */
+  abstract ProtocolVersion getUnrestriction();
+}
+
+/**
+ * A context is being created with a range of allowed protocols.
+ * This also serves as unrestricting these protocols.
+ */
+abstract class UnspecificContextCreation extends ContextCreation, ProtocolUnrestriction {
+  TlsLibrary library;
+  ProtocolFamily family;
+
+  UnspecificContextCreation() { this.getProtocol() = family }
+
+  override DataFlow::CfgNode getContext() { result = this }
+
+  override ProtocolVersion getUnrestriction() {
+    // There is only one family, the two names are aliases in OpenSSL.
+    // see https://github.com/openssl/openssl/blob/13888e797c5a3193e91d71e5f5a196a2d68d266f/include/openssl/ssl.h.in#L1953-L1955
+    family in ["SSLv23", "TLS"] and
+    // see https://docs.python.org/3/library/ssl.html#ssl-contexts
+    result in ["SSLv2", "SSLv3", "TLSv1", "TLSv1_1", "TLSv1_2", "TLSv1_3"]
+  }
+}
+
+/** A model of a SSL/TLS library. */
+abstract class TlsLibrary extends string {
+  bindingset[this]
+  TlsLibrary() { any() }
+
+  /** The name of a specific protocol version. */
+  abstract string specific_version_name(ProtocolVersion version);
+
+  /** Gets a name, which is a member of `version_constants`,  that can be used to specify the protocol family `family`. */
+  abstract string unspecific_version_name(ProtocolFamily family);
+
+  /** Gets an API node representing the module or class holding the version constants. */
+  abstract API::Node version_constants();
+
+  /** Gets an API node representing a specific protocol version. */
+  API::Node specific_version(ProtocolVersion version) {
+    result = version_constants().getMember(specific_version_name(version))
+  }
+
+  /** Gets an API node representing the protocol family `family`. */
+  API::Node unspecific_version(ProtocolFamily family) {
+    result = version_constants().getMember(unspecific_version_name(family))
+  }
+
+  /** Gets a creation of a context with a default protocol. */
+  abstract ContextCreation default_context_creation();
+
+  /** Gets a creation of a context with a specific protocol. */
+  abstract ContextCreation specific_context_creation();
+
+  /** Gets a creation of a context with a specific protocol version, known to be insecure. */
+  ContextCreation insecure_context_creation(ProtocolVersion version) {
+    result in [specific_context_creation(), default_context_creation()] and
+    result.getProtocol() = version and
+    version.isInsecure()
+  }
+
+  /** Gets a context that was created using `family`, known to have insecure instances. */
+  ContextCreation unspecific_context_creation(ProtocolFamily family) {
+    result in [specific_context_creation(), default_context_creation()] and
+    result.getProtocol() = family
+  }
+
+  /** Gets a dataflow node representing a connection being created in an insecure manner, not from a context. */
+  abstract DataFlow::Node insecure_connection_creation(ProtocolVersion version);
+
+  /** Gets a dataflow node representing a connection being created from a context. */
+  abstract ConnectionCreation connection_creation();
+
+  /** Gets a dataflow node representing a context being restricted on which protocols it can accepts. */
+  abstract ProtocolRestriction protocol_restriction();
+
+  /** Gets a dataflow node representing a context being relaxed on which protocols it can accepts. */
+  abstract ProtocolUnrestriction protocol_unrestriction();
+}
--- a/python/ql/src/Security/CWE-327/WeakSensitiveDataHashing.qhelp
+++ b/python/ql/src/Security/CWE-327/WeakSensitiveDataHashing.qhelp
@@ -0,0 +1,104 @@
+<!DOCTYPE qhelp PUBLIC
+"-//Semmle//qhelp//EN"
+"qhelp.dtd">
+<qhelp>
+     <overview>
+          <p>
+               Using a broken or weak cryptographic hash function can leave data
+               vulnerable, and should not be used in security related code.
+          </p>
+
+          <p>
+               A strong cryptographic hash function should be resistant to:
+          </p>
+          <ul>
+               <li>
+                    pre-image attacks: if you know a hash value <code>h(x)</code>,
+                    you should not be able to easily find the input <code>x</code>.
+               </li>
+               <li>
+                    collision attacks: if you know a hash value <code>h(x)</code>,
+                    you should not be able to easily find a different input <code>y</code>
+                    with the same hash value <code>h(x) = h(y)</code>.
+               </li>
+          </ul>
+          <p>
+               In cases with a limited input space, such as for passwords, the hash
+               function also needs to be computationally expensive to be resistant to
+               brute-force attacks. Passwords should also have an unique salt applied
+               before hashing, but that is not considered by this query.
+          </p>
+
+          <p>
+               As an example, both MD5 and SHA-1 are known to be vulnerable to collision attacks.
+          </p>
+
+          <p>
+               Since it's OK to use a weak cryptographic hash function in a non-security
+               context, this query only alerts when these are used to hash sensitive
+               data (such as passwords, certificates, usernames).
+          </p>
+
+          <p>
+               Use of broken or weak cryptographic algorithms that are not hashing algorithms, is
+               handled by the <code>py/weak-cryptographic-algorithm</code> query.
+          </p>
+
+     </overview>
+     <recommendation>
+
+          <p>
+               Ensure that you use a strong, modern cryptographic hash function:
+          </p>
+
+          <ul>
+               <li>
+                    such as Argon2, scrypt, bcrypt, or PBKDF2 for passwords and other data with limited input space.
+               </li>
+               <li>
+                    such as SHA-2, or SHA-3 in other cases.
+               </li>
+          </ul>
+
+     </recommendation>
+     <example>
+
+          <p>
+               The following example shows two functions for checking whether the hash
+               of a certificate matches a known value -- to prevent tampering.
+
+               The first function uses MD5 that is known to be vulnerable to collision attacks.
+
+               The second function uses SHA-256 that is a strong cryptographic hashing function.
+          </p>
+
+          <sample src="examples/weak_certificate_hashing.py" />
+
+     </example>
+     <example>
+          <p>
+               The following example shows two functions for hashing passwords.
+
+               The first function uses SHA-256 to hash passwords. Although SHA-256 is a
+               strong cryptographic hash function, it is not suitable for password
+               hashing since it is not computationally expensive.
+          </p>
+
+          <sample src="examples/weak_password_hashing_bad.py" />
+
+
+          <p>
+               The second function uses Argon2 (through the <code>argon2-cffi</code>
+               PyPI package), which is a strong password hashing algorithm (and
+               includes a per-password salt by default).
+          </p>
+
+          <sample src="examples/weak_password_hashing_good.py" />
+
+     </example>
+
+     <references>
+          <li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html">Password Storage Cheat Sheet</a></li>
+     </references>
+
+</qhelp>
--- a/python/ql/src/Security/CWE-327/WeakSensitiveDataHashing.ql
+++ b/python/ql/src/Security/CWE-327/WeakSensitiveDataHashing.ql
@@ -0,0 +1,48 @@
+/**
+ * @name Use of a broken or weak cryptographic hashing algorithm on sensitive data
+ * @description Using broken or weak cryptographic hashing algorithms can compromise security.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 5.9
+ * @precision high
+ * @id py/weak-sensitive-data-hashing
+ * @tags security
+ *       external/cwe/cwe-327
+ *       external/cwe/cwe-916
+ */
+
+import python
+import semmle.python.security.dataflow.WeakSensitiveDataHashing
+import semmle.python.dataflow.new.DataFlow
+import semmle.python.dataflow.new.TaintTracking
+import DataFlow::PathGraph
+
+from
+  DataFlow::PathNode source, DataFlow::PathNode sink, string ending, string algorithmName,
+  string classification
+where
+  exists(NormalHashFunction::Configuration config |
+    config.hasFlowPath(source, sink) and
+    algorithmName = sink.getNode().(NormalHashFunction::Sink).getAlgorithmName() and
+    classification = source.getNode().(NormalHashFunction::Source).getClassification() and
+    ending = "."
+  )
+  or
+  exists(ComputationallyExpensiveHashFunction::Configuration config |
+    config.hasFlowPath(source, sink) and
+    algorithmName = sink.getNode().(ComputationallyExpensiveHashFunction::Sink).getAlgorithmName() and
+    classification =
+      source.getNode().(ComputationallyExpensiveHashFunction::Source).getClassification() and
+    (
+      sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
+      ending = "."
+      or
+      not sink.getNode().(ComputationallyExpensiveHashFunction::Sink).isComputationallyExpensive() and
+      ending =
+        " for " + classification +
+          " hashing, since it is not a computationally expensive hash function."
+    )
+  )
+select sink.getNode(), source, sink,
+  "$@ is used in a hashing algorithm (" + algorithmName + ") that is insecure" + ending,
+  source.getNode(), "Sensitive data (" + classification + ")"
--- a/python/ql/src/Security/CWE-327/examples/weak_certificate_hashing.py
+++ b/python/ql/src/Security/CWE-327/examples/weak_certificate_hashing.py
@@ -0,0 +1,9 @@
+import hashlib
+
+def certificate_matches_known_hash_bad(certificate, known_hash):
+    hash = hashlib.md5(certificate).hexdigest() # BAD
+    return hash == known_hash
+
+def certificate_matches_known_hash_good(certificate, known_hash):
+    hash = hashlib.sha256(certificate).hexdigest() # GOOD
+    return hash == known_hash
--- a/python/ql/src/Security/CWE-327/examples/weak_password_hashing_bad.py
+++ b/python/ql/src/Security/CWE-327/examples/weak_password_hashing_bad.py
@@ -0,0 +1,4 @@
+import hashlib
+
+def get_password_hash(password: str, salt: str):
+    return hashlib.sha256(password + salt).hexdigest() # BAD
--- a/python/ql/src/Security/CWE-327/examples/weak_password_hashing_good.py
+++ b/python/ql/src/Security/CWE-327/examples/weak_password_hashing_good.py
@@ -0,0 +1,9 @@
+from argon2 import PasswordHasher
+
+def get_initial_hash(password: str):
+    ph = PasswordHasher()
+    return ph.hash(password) # GOOD
+
+def check_password(password: str, known_hash):
+    ph = PasswordHasher()
+    return ph.verify(known_hash, password) # GOOD
--- a/python/ql/src/Security/CWE-377/InsecureTemporaryFile.ql
+++ b/python/ql/src/Security/CWE-377/InsecureTemporaryFile.ql
@@ -4,6 +4,7 @@
 * @kind problem
 * @id py/insecure-temporary-file
 * @problem.severity error
+ * @security-severity 5.9
 * @sub-severity high
 * @precision high
 * @tags external/cwe/cwe-377
--- a/python/ql/src/Security/CWE-502/UnsafeDeserialization.ql
+++ b/python/ql/src/Security/CWE-502/UnsafeDeserialization.ql
@@ -4,6 +4,7 @@
 * @kind path-problem
 * @id py/unsafe-deserialization
 * @problem.severity error
+ * @security-severity 5.9
 * @sub-severity high
 * @precision high
 * @tags external/cwe/cwe-502
--- a/python/ql/src/Security/CWE-601/UrlRedirect.ql
+++ b/python/ql/src/Security/CWE-601/UrlRedirect.ql
@@ -4,6 +4,7 @@
 *              may cause redirection to malicious web sites.
 * @kind path-problem
 * @problem.severity error
+ * @security-severity 2.7
 * @sub-severity low
 * @id py/url-redirection
 * @tags security
--- a/python/ql/src/Security/CWE-732/WeakFilePermissions.ql
+++ b/python/ql/src/Security/CWE-732/WeakFilePermissions.ql
@@ -4,6 +4,7 @@
 * @kind problem
 * @id py/overly-permissive-file
 * @problem.severity warning
+ * @security-severity 5.9
 * @sub-severity high
 * @precision medium
 * @tags external/cwe/cwe-732
--- a/python/ql/src/Security/CWE-798/HardcodedCredentials.ql
+++ b/python/ql/src/Security/CWE-798/HardcodedCredentials.ql
@@ -3,6 +3,7 @@
 * @description Credentials are hard coded in the source code of the application.
 * @kind path-problem
 * @problem.severity error
+ * @security-severity 5.9
 * @precision medium
 * @id py/hardcoded-credentials
 * @tags security
--- a/python/ql/src/Statements/ExecUsed.ql
+++ b/python/ql/src/Statements/ExecUsed.ql
@@ -5,6 +5,7 @@
 * @tags security
 *       correctness
 * @problem.severity error
+ * @security-severity 4.2
 * @sub-severity high
 * @precision low
 * @id py/use-of-exec
--- a/python/ql/src/Summary/LinesOfCode.ql
+++ b/python/ql/src/Summary/LinesOfCode.ql
@@ -0,0 +1,13 @@
+/**
+ * @name Total lines of Python code in the database
+ * @description The total number of lines of Python code across all files, including
+ *   external libraries and auto-generated files. This is a useful metric of the size of a
+ *   database. This query counts the lines of code, excluding whitespace or comments.
+ * @kind metric
+ * @tags summary
+ * @id py/summary/lines-of-code
+ */
+
+import python
+
+select sum(Module m | | m.getMetrics().getNumberOfLinesOfCode())
--- a/python/ql/src/Summary/LinesOfUserCode.ql
+++ b/python/ql/src/Summary/LinesOfUserCode.ql
@@ -0,0 +1,22 @@
+/**
+ * @name Total lines of user written Python code in the database
+ * @description The total number of lines of Python code from the source code directory,
+ *   excluding auto-generated files. This query counts the lines of code, excluding
+ *   whitespace or comments. Note: If external libraries are included in the codebase
+ *   either in a checked-in virtual environment or as vendored code, that will currently
+ *   be counted as user written code.
+ * @kind metric
+ * @tags summary
+ *       lines-of-code
+ * @id py/summary/lines-of-user-code
+ */
+
+import python
+import semmle.python.filters.GeneratedCode
+
+select sum(Module m |
+    exists(m.getFile().getRelativePath()) and
+    not m.getFile() instanceof GeneratedFile
+  |
+    m.getMetrics().getNumberOfLinesOfCode()
+  )
--- a/python/ql/src/Variables/ShadowGlobal.qhelp
+++ b/python/ql/src/Variables/ShadowGlobal.qhelp
@@ -26,9 +26,8 @@ variable should be renamed to make the code easier to interpret.</p>
 </example>
 <references>

-<li>J. Lusth, <i>The Art and Craft of Programming - Python Edition</i>, Section: Scope. University of Alabama, 2012. (<a href="http://troll.cs.ua.edu/ACP-PY/index_13.html">Published online</a>).</li>
-<li>New Mexico Tech Computer Center: <a href="http://infohost.nmt.edu/tcc/help/pubs/python/web/global-statement.html">The global
-statement: Declare access to a global name</a>.</li>
+<li>J. Lusth, <i>The Art and Craft of Programming - Python Edition</i>, Section: Scope. University of Alabama, 2012. (<a href="https://web.archive.org/web/20190919091129/http://troll.cs.ua.edu/ACP-PY/index_13.html">Published online</a>).</li>
+<li>Python Language Reference: <a href="http://docs.python.org/reference/simple_stmts.html#the-global-statement">The global statement</a>.</li>



--- a/python/ql/src/experimental/Classes/NamingConventionsClasses.qhelp
+++ b/python/ql/src/experimental/Classes/NamingConventionsClasses.qhelp
@@ -0,0 +1,30 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+
+
+<overview>
+<p>A class name that begins with a lowercase letter does not follow standard
+naming conventions. This decreases code readability. For example, <code>class background</code>.
+</p>
+
+</overview>
+<recommendation>
+
+<p>
+Write the class name beginning with an uppercase letter. For example, <code>class Background</code>.
+</p>
+
+</recommendation>
+
+<references>
+
+<li>
+  Guido van Rossum, Barry Warsaw, Nick Coghlan <em>PEP 8 -- Style Guide for Python Code</em>
+  <a href="https://www.python.org/dev/peps/pep-0008/#class-names">Python Class Names</a>
+</li>
+
+</references>
+
+</qhelp>
--- a/python/ql/src/experimental/Classes/NamingConventionsClasses.ql
+++ b/python/ql/src/experimental/Classes/NamingConventionsClasses.ql
@@ -0,0 +1,28 @@
+/**
+ * @name Misnamed class
+ * @description A class name that begins with a lowercase letter decreases readability.
+ * @kind problem
+ * @problem.severity recommendation
+ * @id py/misnamed-class
+ * @tags maintainability
+ */
+
+import python
+
+predicate lower_case_class(Class c) {
+  exists(string first_char |
+    first_char = c.getName().prefix(1) and
+    not first_char = first_char.toUpperCase()
+  )
+}
+
+from Class c
+where
+  c.inSource() and
+  lower_case_class(c) and
+  not exists(Class c1 |
+    c1 != c and
+    c1.getLocation().getFile() = c.getLocation().getFile() and
+    lower_case_class(c1)
+  )
+select c, "Class names should start in uppercase."
--- a/python/ql/src/experimental/Functions/NamingConventionsFunctions.qhelp
+++ b/python/ql/src/experimental/Functions/NamingConventionsFunctions.qhelp
@@ -0,0 +1,30 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+
+
+<overview>
+<p>A function name that begins with an uppercase letter does not follow standard
+naming conventions. This decreases code readability. For example, <code>Jump</code>.
+</p>
+
+</overview>
+<recommendation>
+
+<p>
+Write the function name beginning with an lowercase letter. For example, <code>jump</code>.
+</p>
+
+</recommendation>
+
+<references>
+
+<li>
+  Guido van Rossum, Barry Warsaw, Nick Coghlan <em>PEP 8 -- Style Guide for Python Code</em>
+  <a href="https://www.python.org/dev/peps/pep-0008/#function-and-variable-names">Python Function and Variable Names</a>
+</li>
+
+</references>
+
+</qhelp>
--- a/python/ql/src/experimental/Functions/NamingConventionsFunctions.ql
+++ b/python/ql/src/experimental/Functions/NamingConventionsFunctions.ql
@@ -0,0 +1,28 @@
+/**
+ * @name Misnamed function
+ * @description A function name that begins with an uppercase letter decreases readability.
+ * @kind problem
+ * @problem.severity recommendation
+ * @id py/misnamed-function
+ * @tags maintainability
+ */
+
+import python
+
+predicate upper_case_function(Function func) {
+  exists(string first_char |
+    first_char = func.getName().prefix(1) and
+    not first_char = first_char.toLowerCase()
+  )
+}
+
+from Function func
+where
+  func.inSource() and
+  upper_case_function(func) and
+  not exists(Function func1 |
+    func1 != func and
+    func1.getLocation().getFile() = func.getLocation().getFile() and
+    upper_case_function(func1)
+  )
+select func, "Function names should start in lowercase."
--- a/python/ql/src/experimental/Security-old-dataflow/CWE-327/BrokenCryptoAlgorithm.ql
+++ b/python/ql/src/experimental/Security-old-dataflow/CWE-327/BrokenCryptoAlgorithm.ql
@@ -0,0 +1,28 @@
+/**
+ * @name OLD QUERY: Use of a broken or weak cryptographic algorithm
+ * @description Using broken or weak cryptographic algorithms can compromise security.
+ * @kind path-problem
+ * @problem.severity warning
+ * @id py/old/weak-cryptographic-algorithm
+ * @deprecated
+ */
+
+import python
+import semmle.python.security.Paths
+import semmle.python.security.SensitiveData
+import semmle.python.security.Crypto
+
+class BrokenCryptoConfiguration extends TaintTracking::Configuration {
+  BrokenCryptoConfiguration() { this = "Broken crypto configuration" }
+
+  override predicate isSource(TaintTracking::Source source) {
+    source instanceof SensitiveDataSource
+  }
+
+  override predicate isSink(TaintTracking::Sink sink) { sink instanceof WeakCryptoSink }
+}
+
+from BrokenCryptoConfiguration config, TaintedPathSource src, TaintedPathSink sink
+where config.hasFlowPath(src, sink)
+select sink.getSink(), src, sink, "$@ is used in a broken or weak cryptographic algorithm.",
+  src.getSource(), "Sensitive data"
--- a/python/ql/src/experimental/Security/CWE-090/LDAPInjection.qhelp
+++ b/python/ql/src/experimental/Security/CWE-090/LDAPInjection.qhelp
@@ -0,0 +1,50 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+<overview>
+<p>If an LDAP query or DN is built using string concatenation or string formatting, and the
+components of the concatenation include user input without any proper sanitization, a user 
+is likely to be able to run malicious LDAP queries.</p>
+</overview>
+
+<recommendation>
+<p>If user input must be included in an LDAP query or DN, it should be escaped to
+avoid a malicious user providing special characters that change the meaning
+of the query. In Python2, user input should be escaped with <code>ldap.dn.escape_dn_chars</code> 
+or <code>ldap.filter.escape_filter_chars</code>, while in Python3, user input should be escaped with 
+<code>ldap3.utils.dn.escape_rdn</code> or <code>ldap3.utils.conv.escape_filter_chars</code>
+depending on the component tainted by the user. A good practice is to escape filter characters 
+that could change the meaning of the query (https://tools.ietf.org/search/rfc4515#section-3).</p>
+</recommendation>
+
+<example>
+<p>In the following examples, the code accepts both <code>username</code> and <code>dc</code> from the user, 
+which it then uses to build a LDAP query and DN.</p>
+
+<p>The first and the second example uses the unsanitized user input directly
+in the search filter and DN for the LDAP query.
+A malicious user could provide special characters to change the meaning of these
+components, and search for a completely different set of values.</p>
+
+<sample src="examples/example_bad1.py" />
+<sample src="examples/example_bad2.py" />
+
+<p>In the third and four example, the input provided by the user is sanitized before it is included in the search filter or DN. 
+This ensures the meaning of the query cannot be changed by a malicious user.</p>
+
+<sample src="examples/example_good1.py" />
+<sample src="examples/example_good2.py" />
+</example>
+
+<references>
+<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/LDAP_Injection_Prevention_Cheat_Sheet.html">LDAP Injection Prevention Cheat Sheet</a>.</li>
+<li>OWASP: <a href="https://owasp.org/www-community/attacks/LDAP_Injection">LDAP Injection</a>.</li>
+<li>SonarSource: <a href="https://rules.sonarsource.com/python/RSPEC-2078">RSPEC-2078</a>.</li>
+<li>Python2: <a href="https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html">LDAP Documentation</a>.</li>
+<li>Python3: <a href="https://ldap3.readthedocs.io/en/latest/">LDAP Documentation</a>.</li>
+<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/LDAP_injection">LDAP injection</a>.</li>
+<li>BlackHat: <a href="https://www.blackhat.com/presentations/bh-europe-08/Alonso-Parada/Whitepaper/bh-eu-08-alonso-parada-WP.pdf">LDAP Injection and Blind LDAP Injection</a>.</li>
+<li>LDAP: <a href="https://ldap.com/2018/05/04/understanding-and-defending-against-ldap-injection-attacks/">Understanding and Defending Against LDAP Injection Attacks</a>.</li>
+</references>
+</qhelp>
--- a/python/ql/src/experimental/Security/CWE-090/LDAPInjection.ql
+++ b/python/ql/src/experimental/Security/CWE-090/LDAPInjection.ql
@@ -0,0 +1,21 @@
+/**
+ * @name LDAP query built from user-controlled sources
+ * @description Building an LDAP query from user-controlled sources is vulnerable to insertion of
+ *              malicious LDAP code by the user.
+ * @kind path-problem
+ * @problem.severity error
+ * @id py/ldap-injection
+ * @tags experimental	
+ *       security	
+ *       external/cwe/cwe-090
+ */
+
+// Determine precision above
+import python
+import experimental.semmle.python.security.injection.LDAP
+import DataFlow::PathGraph
+
+from LDAPInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "$@ LDAP query parameter comes from $@.", sink.getNode(),
+  "This", source.getNode(), "a user-provided value"
--- a/python/ql/src/experimental/Security/CWE-090/examples/example_bad1.py
+++ b/python/ql/src/experimental/Security/CWE-090/examples/example_bad1.py
@@ -0,0 +1,15 @@
+from flask import request, Flask
+import ldap
+
+
+@app.route("/normal")
+def normal():
+    unsafe_dc = request.args['dc']
+    unsafe_filter = request.args['username']
+
+    dn = "dc={}".format(unsafe_dc)
+    search_filter = "(user={})".format(unsafe_filter)
+
+    ldap_connection = ldap.initialize("ldap://127.0.0.1")
+    user = ldap_connection.search_s(
+        dn, ldap.SCOPE_SUBTREE, search_filter)
--- a/python/ql/src/experimental/Security/CWE-090/examples/example_bad2.py
+++ b/python/ql/src/experimental/Security/CWE-090/examples/example_bad2.py
@@ -0,0 +1,15 @@
+from flask import request, Flask
+import ldap3
+
+
+@app.route("/normal")
+def normal():
+    unsafe_dc = request.args['dc']
+    unsafe_filter = request.args['username']
+
+    dn = "dc={}".format(unsafe_dc)
+    search_filter = "(user={})".format(unsafe_filter)
+
+    srv = ldap3.Server('ldap://127.0.0.1')
+    conn = ldap3.Connection(srv, user=dn, auto_bind=True)
+    conn.search(dn, search_filter)
--- a/python/ql/src/experimental/Security/CWE-090/examples/example_good1.py
+++ b/python/ql/src/experimental/Security/CWE-090/examples/example_good1.py
@@ -0,0 +1,20 @@
+from flask import request, Flask
+import ldap
+import ldap.filter
+import ldap.dn
+
+
+@app.route("/normal")
+def normal():
+    unsafe_dc = request.args['dc']
+    unsafe_filter = request.args['username']
+
+    safe_dc = ldap.dn.escape_dn_chars(unsafe_dc)
+    safe_filter = ldap.filter.escape_filter_chars(unsafe_filter)
+
+    dn = "dc={}".format(safe_dc)
+    search_filter = "(user={})".format(safe_filter)
+
+    ldap_connection = ldap.initialize("ldap://127.0.0.1")
+    user = ldap_connection.search_s(
+        dn, ldap.SCOPE_SUBTREE, search_filter)
--- a/python/ql/src/experimental/Security/CWE-090/examples/example_good2.py
+++ b/python/ql/src/experimental/Security/CWE-090/examples/example_good2.py
@@ -0,0 +1,20 @@
+from flask import request, Flask
+import ldap3
+from ldap3.utils.dn import escape_rdn
+from ldap3.utils.conv import escape_filter_chars
+
+
+@app.route("/normal")
+def normal():
+    unsafe_dc = request.args['dc']
+    unsafe_filter = request.args['username']
+
+    safe_dc = escape_rdn(unsafe_dc)
+    safe_filter = escape_filter_chars(unsafe_filter)
+
+    dn = "dc={}".format(safe_dc)
+    search_filter = "(user={})".format(safe_filter)
+
+    srv = ldap3.Server('ldap://127.0.0.1')
+    conn = ldap3.Connection(srv, user=dn, auto_bind=True)
+    conn.search(dn, search_filter)
--- a/python/ql/src/experimental/Security/CWE-730/RegexInjection.qhelp
+++ b/python/ql/src/experimental/Security/CWE-730/RegexInjection.qhelp
@@ -0,0 +1,45 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+<overview>
+<p>
+Constructing a regular expression with unsanitized user input is dangerous as a malicious user may
+be able to modify the meaning of the expression. In particular, such a user may be able to provide
+a regular expression fragment that takes exponential time in the worst case, and use that to
+perform a Denial of Service attack.
+</p>
+</overview>
+
+<recommendation>
+<p>
+Before embedding user input into a regular expression, use a sanitization function such as
+<code>re.escape</code> to escape meta-characters that have a special meaning regarding 
+regular expressions' syntax.
+</p>
+</recommendation>
+
+<example>
+<p>
+The following examples are based on a simple Flask web server environment.
+</p>
+<p>
+The following example shows a HTTP request parameter that is used to construct a regular expression
+without sanitizing it first:
+</p>
+<sample src="re_bad.py" />
+<p>
+Instead, the request parameter should be sanitized first, for example using the function
+<code>re.escape</code>. This ensures that the user cannot insert characters which have a
+special meaning in regular expressions.
+</p>
+<sample src="re_good.py" />
+</example>
+
+<references>
+<li>OWASP: <a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.</li>
+<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.</li>
+<li>Python docs: <a href="https://docs.python.org/3/library/re.html">re</a>.</li>
+<li>SonarSource: <a href="https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-2631">RSPEC-2631</a>.</li>
+</references>
+</qhelp>
--- a/python/ql/src/experimental/Security/CWE-730/RegexInjection.ql
+++ b/python/ql/src/experimental/Security/CWE-730/RegexInjection.ql
@@ -0,0 +1,29 @@
+/**
+ * @name Regular expression injection
+ * @description User input should not be used in regular expressions without first being escaped,
+ *              otherwise a malicious user may be able to inject an expression that could require
+ *              exponential time on certain inputs.
+ * @kind path-problem
+ * @problem.severity error
+ * @id py/regex-injection
+ * @tags security
+ *       external/cwe/cwe-730
+ *       external/cwe/cwe-400
+ */
+
+// determine precision above
+import python
+import experimental.semmle.python.security.injection.RegexInjection
+import DataFlow::PathGraph
+
+from
+  RegexInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
+  RegexInjectionSink regexInjectionSink, Attribute methodAttribute
+where
+  config.hasFlowPath(source, sink) and
+  regexInjectionSink = sink.getNode() and
+  methodAttribute = regexInjectionSink.getRegexMethod()
+select sink.getNode(), source, sink,
+  "$@ regular expression is constructed from a $@ and executed by $@.", sink.getNode(), "This",
+  source.getNode(), "user-provided value", methodAttribute,
+  regexInjectionSink.getRegexModule() + "." + methodAttribute.getName()
--- a/python/ql/src/experimental/Security/CWE-730/re_bad.py
+++ b/python/ql/src/experimental/Security/CWE-730/re_bad.py
@@ -0,0 +1,15 @@
+from flask import request, Flask
+import re
+
+
+@app.route("/direct")
+def direct():
+    unsafe_pattern = request.args["pattern"]
+    re.search(unsafe_pattern, "")
+
+
+@app.route("/compile")
+def compile():
+    unsafe_pattern = request.args["pattern"]
+    compiled_pattern = re.compile(unsafe_pattern)
+    compiled_pattern.search("")
--- a/python/ql/src/experimental/Security/CWE-730/re_good.py
+++ b/python/ql/src/experimental/Security/CWE-730/re_good.py
@@ -0,0 +1,17 @@
+from flask import request, Flask
+import re
+
+
+@app.route("/direct")
+def direct():
+    unsafe_pattern = request.args['pattern']
+    safe_pattern = re.escape(unsafe_pattern)
+    re.search(safe_pattern, "")
+
+
+@app.route("/compile")
+def compile():
+    unsafe_pattern = request.args['pattern']
+    safe_pattern = re.escape(unsafe_pattern)
+    compiled_pattern = re.compile(safe_pattern)
+    compiled_pattern.search("")
--- a/python/ql/src/experimental/semmle/python/Concepts.qll
+++ b/python/ql/src/experimental/semmle/python/Concepts.qll
@@ -14,6 +14,139 @@ private import semmle.python.dataflow.new.RemoteFlowSources
 private import semmle.python.dataflow.new.TaintTracking
 private import experimental.semmle.python.Frameworks

+/** Provides classes for modeling Regular Expression-related APIs. */
+module RegexExecution {
+  /**
+   * A data-flow node that executes a regular expression.
+   *
+   * Extend this class to model new APIs. If you want to refine existing API models,
+   * extend `RegexExecution` instead.
+   */
+  abstract class Range extends DataFlow::Node {
+    /**
+     * Gets the argument containing the executed expression.
+     */
+    abstract DataFlow::Node getRegexNode();
+
+    /**
+     * Gets the library used to execute the regular expression.
+     */
+    abstract string getRegexModule();
+  }
+}
+
+/**
+ * A data-flow node that executes a regular expression.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `RegexExecution::Range` instead.
+ */
+class RegexExecution extends DataFlow::Node {
+  RegexExecution::Range range;
+
+  RegexExecution() { this = range }
+
+  DataFlow::Node getRegexNode() { result = range.getRegexNode() }
+
+  string getRegexModule() { result = range.getRegexModule() }
+}
+
+/** Provides classes for modeling Regular Expression escape-related APIs. */
+module RegexEscape {
+  /**
+   * A data-flow node that escapes a regular expression.
+   *
+   * Extend this class to model new APIs. If you want to refine existing API models,
+   * extend `RegexEscape` instead.
+   */
+  abstract class Range extends DataFlow::Node {
+    /**
+     * Gets the argument containing the escaped expression.
+     */
+    abstract DataFlow::Node getRegexNode();
+  }
+}
+
+/**
+ * A data-flow node that escapes a regular expression.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `RegexEscape::Range` instead.
+ */
+class RegexEscape extends DataFlow::Node {
+  RegexEscape::Range range;
+
+  RegexEscape() { this = range }
+
+  DataFlow::Node getRegexNode() { result = range.getRegexNode() }
+}
+
+/** Provides classes for modeling LDAP query execution-related APIs. */
+module LDAPQuery {
+  /**
+   * A data-flow node that collects methods executing a LDAP query.
+   *
+   * Extend this class to model new APIs. If you want to refine existing API models,
+   * extend `LDAPQuery` instead.
+   */
+  abstract class Range extends DataFlow::Node {
+    /**
+     * Gets the argument containing the executed expression.
+     */
+    abstract DataFlow::Node getQuery();
+  }
+}
+
+/**
+ * A data-flow node that collect methods executing a LDAP query.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `LDAPQuery::Range` instead.
+ */
+class LDAPQuery extends DataFlow::Node {
+  LDAPQuery::Range range;
+
+  LDAPQuery() { this = range }
+
+  /**
+   * Gets the argument containing the executed expression.
+   */
+  DataFlow::Node getQuery() { result = range.getQuery() }
+}
+
+/** Provides classes for modeling LDAP components escape-related APIs. */
+module LDAPEscape {
+  /**
+   * A data-flow node that collects functions escaping LDAP components.
+   *
+   * Extend this class to model new APIs. If you want to refine existing API models,
+   * extend `LDAPEscape` instead.
+   */
+  abstract class Range extends DataFlow::Node {
+    /**
+     * Gets the argument containing the escaped expression.
+     */
+    abstract DataFlow::Node getAnInput();
+  }
+}
+
+/**
+ * A data-flow node that collects functions escaping LDAP components.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `LDAPEscape::Range` instead.
+ */
+class LDAPEscape extends DataFlow::Node {
+  LDAPEscape::Range range;
+
+  LDAPEscape() { this = range }
+
+  /**
+   * Gets the argument containing the escaped expression.
+   */
+  DataFlow::Node getAnInput() { result = range.getAnInput() }
+}
+
 /** Provides classes for modeling HTTP Header APIs. */
 module HeaderDeclaration {
  /**
--- a/python/ql/src/experimental/semmle/python/Frameworks.qll
+++ b/python/ql/src/experimental/semmle/python/Frameworks.qll
@@ -6,3 +6,4 @@ private import experimental.semmle.python.frameworks.Stdlib
 private import experimental.semmle.python.frameworks.Flask
 private import experimental.semmle.python.frameworks.Django
 private import experimental.semmle.python.frameworks.Werkzeug
+private import experimental.semmle.python.frameworks.LDAP
--- a/python/ql/src/experimental/semmle/python/frameworks/LDAP.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/LDAP.qll
@@ -0,0 +1,153 @@
+/**
+ * Provides classes modeling security-relevant aspects of the LDAP libraries.
+ */
+
+private import python
+private import semmle.python.dataflow.new.DataFlow
+private import semmle.python.dataflow.new.TaintTracking
+private import semmle.python.dataflow.new.RemoteFlowSources
+private import experimental.semmle.python.Concepts
+private import semmle.python.ApiGraphs
+
+/**
+ * Provides models for Python's ldap-related libraries.
+ */
+private module LDAP {
+  /**
+   * Provides models for the `python-ldap` PyPI package (imported as `ldap`).
+   *
+   * See https://www.python-ldap.org/en/python-ldap-3.3.0/index.html
+   */
+  private module LDAP2 {
+    /**
+     * List of `ldap` methods used to execute a query.
+     *
+     * See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap.html#functions
+     */
+    private class LDAP2QueryMethods extends string {
+      LDAP2QueryMethods() {
+        this in ["search", "search_s", "search_st", "search_ext", "search_ext_s"]
+      }
+    }
+
+    /**
+     * A class to find `ldap` methods executing a query.
+     *
+     * See `LDAP2QueryMethods`
+     */
+    private class LDAP2Query extends DataFlow::CallCfgNode, LDAPQuery::Range {
+      DataFlow::Node ldapQuery;
+
+      LDAP2Query() {
+        exists(DataFlow::AttrRead searchMethod |
+          this.getFunction() = searchMethod and
+          API::moduleImport("ldap").getMember("initialize").getACall() =
+            searchMethod.getObject().getALocalSource() and
+          searchMethod.getAttributeName() instanceof LDAP2QueryMethods and
+          (
+            ldapQuery = this.getArg(0)
+            or
+            (
+              ldapQuery = this.getArg(2) or
+              ldapQuery = this.getArgByName("filterstr")
+            )
+          )
+        )
+      }
+
+      override DataFlow::Node getQuery() { result = ldapQuery }
+    }
+
+    /**
+     * A class to find calls to `ldap.dn.escape_dn_chars`.
+     *
+     * See https://github.com/python-ldap/python-ldap/blob/7ce471e238cdd9a4dd8d17baccd1c9e05e6f894a/Lib/ldap/dn.py#L17
+     */
+    private class LDAP2EscapeDNCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
+      LDAP2EscapeDNCall() {
+        this = API::moduleImport("ldap").getMember("dn").getMember("escape_dn_chars").getACall()
+      }
+
+      override DataFlow::Node getAnInput() { result = this.getArg(0) }
+    }
+
+    /**
+     * A class to find calls to `ldap.filter.escape_filter_chars`.
+     *
+     * See https://www.python-ldap.org/en/python-ldap-3.3.0/reference/ldap-filter.html#ldap.filter.escape_filter_chars
+     */
+    private class LDAP2EscapeFilterCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
+      LDAP2EscapeFilterCall() {
+        this =
+          API::moduleImport("ldap").getMember("filter").getMember("escape_filter_chars").getACall()
+      }
+
+      override DataFlow::Node getAnInput() { result = this.getArg(0) }
+    }
+  }
+
+  /**
+   * Provides models for the `ldap3` PyPI package
+   *
+   * See https://pypi.org/project/ldap3/
+   */
+  private module LDAP3 {
+    /**
+     * A class to find `ldap3` methods executing a query.
+     */
+    private class LDAP3Query extends DataFlow::CallCfgNode, LDAPQuery::Range {
+      DataFlow::Node ldapQuery;
+
+      LDAP3Query() {
+        exists(DataFlow::AttrRead searchMethod |
+          this.getFunction() = searchMethod and
+          API::moduleImport("ldap3").getMember("Connection").getACall() =
+            searchMethod.getObject().getALocalSource() and
+          searchMethod.getAttributeName() = "search" and
+          (
+            ldapQuery = this.getArg(0) or
+            ldapQuery = this.getArg(1)
+          )
+        )
+      }
+
+      override DataFlow::Node getQuery() { result = ldapQuery }
+    }
+
+    /**
+     * A class to find calls to `ldap3.utils.dn.escape_rdn`.
+     *
+     * See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/dn.py#L390
+     */
+    private class LDAP3EscapeDNCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
+      LDAP3EscapeDNCall() {
+        this =
+          API::moduleImport("ldap3")
+              .getMember("utils")
+              .getMember("dn")
+              .getMember("escape_rdn")
+              .getACall()
+      }
+
+      override DataFlow::Node getAnInput() { result = this.getArg(0) }
+    }
+
+    /**
+     * A class to find calls to `ldap3.utils.conv.escape_filter_chars`.
+     *
+     * See https://github.com/cannatag/ldap3/blob/4d33166f0869b929f59c6e6825a1b9505eb99967/ldap3/utils/conv.py#L91
+     */
+    private class LDAP3EscapeFilterCall extends DataFlow::CallCfgNode, LDAPEscape::Range {
+      LDAP3EscapeFilterCall() {
+        this =
+          API::moduleImport("ldap3")
+              .getMember("utils")
+              .getMember("conv")
+              .getMember("escape_filter_chars")
+              .getACall()
+      }
+
+      override DataFlow::Node getAnInput() { result = this.getArg(0) }
+    }
+  }
+}
--- a/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll
@@ -9,3 +9,92 @@ private import semmle.python.dataflow.new.TaintTracking
 private import semmle.python.dataflow.new.RemoteFlowSources
 private import experimental.semmle.python.Concepts
 private import semmle.python.ApiGraphs
+
+/**
+ * Provides models for Python's `re` library.
+ *
+ * See https://docs.python.org/3/library/re.html
+ */
+private module Re {
+  /**
+   * List of `re` methods immediately executing an expression.
+   *
+   * See https://docs.python.org/3/library/re.html#module-contents
+   */
+  private class RegexExecutionMethods extends string {
+    RegexExecutionMethods() {
+      this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
+    }
+  }
+
+  /**
+   * A class to find `re` methods immediately executing an expression.
+   *
+   * See `RegexExecutionMethods`
+   */
+  private class DirectRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
+    DataFlow::Node regexNode;
+
+    DirectRegex() {
+      this = API::moduleImport("re").getMember(any(RegexExecutionMethods m)).getACall() and
+      regexNode = this.getArg(0)
+    }
+
+    override DataFlow::Node getRegexNode() { result = regexNode }
+
+    override string getRegexModule() { result = "re" }
+  }
+
+  /**
+   * A class to find `re` methods immediately executing a compiled expression by `re.compile`.
+   *
+   * Given the following example:
+   *
+   * ```py
+   * pattern = re.compile(input)
+   * pattern.match(s)
+   * ```
+   *
+   * This class will identify that `re.compile` compiles `input` and afterwards
+   * executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
+   * and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument)
+   *
+   *
+   * See `RegexExecutionMethods`
+   *
+   * See https://docs.python.org/3/library/re.html#regular-expression-objects
+   */
+  private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
+    DataFlow::Node regexNode;
+
+    CompiledRegex() {
+      exists(DataFlow::CallCfgNode patternCall, DataFlow::AttrRead reMethod |
+        this.getFunction() = reMethod and
+        patternCall = API::moduleImport("re").getMember("compile").getACall() and
+        patternCall.flowsTo(reMethod.getObject()) and
+        reMethod.getAttributeName() instanceof RegexExecutionMethods and
+        regexNode = patternCall.getArg(0)
+      )
+    }
+
+    override DataFlow::Node getRegexNode() { result = regexNode }
+
+    override string getRegexModule() { result = "re" }
+  }
+
+  /**
+   * A class to find `re` methods escaping an expression.
+   *
+   * See https://docs.python.org/3/library/re.html#re.escape
+   */
+  class ReEscape extends DataFlow::CallCfgNode, RegexEscape::Range {
+    DataFlow::Node regexNode;
+
+    ReEscape() {
+      this = API::moduleImport("re").getMember("escape").getACall() and
+      regexNode = this.getArg(0)
+    }
+
+    override DataFlow::Node getRegexNode() { result = regexNode }
+  }
+}
--- a/python/ql/src/experimental/semmle/python/security/injection/LDAP.qll
+++ b/python/ql/src/experimental/semmle/python/security/injection/LDAP.qll
@@ -0,0 +1,24 @@
+/**
+ * Provides a taint-tracking configuration for detecting LDAP injection vulnerabilities
+ */
+
+import python
+import experimental.semmle.python.Concepts
+import semmle.python.dataflow.new.DataFlow
+import semmle.python.dataflow.new.TaintTracking
+import semmle.python.dataflow.new.RemoteFlowSources
+
+/**
+ * A taint-tracking configuration for detecting LDAP injections.
+ */
+class LDAPInjectionFlowConfig extends TaintTracking::Configuration {
+  LDAPInjectionFlowConfig() { this = "LDAPInjectionFlowConfig" }
+
+  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
+
+  override predicate isSink(DataFlow::Node sink) { sink = any(LDAPQuery ldapQuery).getQuery() }
+
+  override predicate isSanitizer(DataFlow::Node sanitizer) {
+    sanitizer = any(LDAPEscape ldapEsc).getAnInput()
+  }
+}
--- a/python/ql/src/experimental/semmle/python/security/injection/RegexInjection.qll
+++ b/python/ql/src/experimental/semmle/python/security/injection/RegexInjection.qll
@@ -0,0 +1,53 @@
+/**
+ * Provides a taint-tracking configuration for detecting regular expression injection
+ * vulnerabilities.
+ */
+
+import python
+import experimental.semmle.python.Concepts
+import semmle.python.dataflow.new.DataFlow
+import semmle.python.dataflow.new.TaintTracking
+import semmle.python.dataflow.new.RemoteFlowSources
+
+/**
+ * A class to find methods executing regular expressions.
+ *
+ * See `RegexExecution`
+ */
+class RegexInjectionSink extends DataFlow::Node {
+  string regexModule;
+  Attribute regexMethod;
+
+  RegexInjectionSink() {
+    exists(RegexExecution reExec |
+      this = reExec.getRegexNode() and
+      regexModule = reExec.getRegexModule() and
+      regexMethod = reExec.(DataFlow::CallCfgNode).getFunction().asExpr().(Attribute)
+    )
+  }
+
+  /**
+   * Gets the argument containing the executed expression.
+   */
+  string getRegexModule() { result = regexModule }
+
+  /**
+   * Gets the method used to execute the regular expression.
+   */
+  Attribute getRegexMethod() { result = regexMethod }
+}
+
+/**
+ * A taint-tracking configuration for detecting regular expression injections.
+ */
+class RegexInjectionFlowConfig extends TaintTracking::Configuration {
+  RegexInjectionFlowConfig() { this = "RegexInjectionFlowConfig" }
+
+  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
+
+  override predicate isSink(DataFlow::Node sink) { sink instanceof RegexInjectionSink }
+
+  override predicate isSanitizer(DataFlow::Node sanitizer) {
+    sanitizer = any(RegexEscape reEscape).getRegexNode()
+  }
+}
--- a/python/ql/src/external/MostlyDuplicateClass.qhelp
+++ b/python/ql/src/external/MostlyDuplicateClass.qhelp
@@ -25,7 +25,7 @@ duplicate classes.</p>
 </recommendation>
 <references>

-  <li>E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, <em>Do Code Clones Matter?</em>, 2009. (<a href="http://www4.in.tum.de/~juergens/publications/ICSE2009_RP_0110_juergens.pdf">available online</a>).</li>
+  <li>E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, <em>Do Code Clones Matter?</em>, 2009. (<a href="https://wwwbroy.in.tum.de/~juergens/publications/ICSE2009_RP_0110_juergens.pdf">available online</a>).</li>

 </references>
 </qhelp>
--- a/python/ql/src/external/MostlyDuplicateFile.qhelp
+++ b/python/ql/src/external/MostlyDuplicateFile.qhelp
@@ -25,7 +25,7 @@ importing that module into the original module.</p>
 </recommendation>
 <references>

-  <li>E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, <em>Do Code Clones Matter?</em>, 2009. (<a href="http://www4.in.tum.de/~juergens/publications/ICSE2009_RP_0110_juergens.pdf">available online</a>).</li>
+  <li>E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, <em>Do Code Clones Matter?</em>, 2009. (<a href="https://wwwbroy.in.tum.de/~juergens/publications/ICSE2009_RP_0110_juergens.pdf">available online</a>).</li>

 </references>
 </qhelp>
--- a/python/ql/src/external/MostlySimilarFile.qhelp
+++ b/python/ql/src/external/MostlySimilarFile.qhelp
@@ -19,7 +19,7 @@ of the shared code into its own module and import that module into the original.
 </recommendation>
 <references>

-  <li>E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, <em>Do Code Clones Matter?</em>, 2009. (<a href="http://www4.in.tum.de/~juergens/publications/ICSE2009_RP_0110_juergens.pdf">available online</a>).</li>
+  <li>E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, <em>Do Code Clones Matter?</em>, 2009. (<a href="https://wwwbroy.in.tum.de/~juergens/publications/ICSE2009_RP_0110_juergens.pdf">available online</a>).</li>

 </references>
 </qhelp>
--- a/python/ql/src/external/SimilarFunction.qhelp
+++ b/python/ql/src/external/SimilarFunction.qhelp
@@ -25,7 +25,7 @@ almost all of their lines are the same, then consider extracting the same lines
 </recommendation>
 <references>

-  <li>E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, <em>Do Code Clones Matter?</em>, 2009. (<a href="http://www4.in.tum.de/~juergens/publications/ICSE2009_RP_0110_juergens.pdf">available online</a>).</li>
+  <li>E. Juergens, F. Deissenboeck, B. Hummel and S. Wagner, <em>Do Code Clones Matter?</em>, 2009. (<a href="https://wwwbroy.in.tum.de/~juergens/publications/ICSE2009_RP_0110_juergens.pdf">available online</a>).</li>

 </references>
 </qhelp>
--- a/python/ql/src/semmle/crypto/Crypto.qll
+++ b/python/ql/src/semmle/crypto/Crypto.qll
@@ -1,174 +1,3 @@
-/**
- * Provides classes modeling cryptographic algorithms, separated into strong and weak variants.
- *
- * The classification into strong and weak are based on Wikipedia, OWASP and google (2017).
- */
+/** DEPRECATED: Use `semmle.python.concepts.CryptoAlgorithms` instead. */

-/**
- * Names of cryptographic algorithms, separated into strong and weak variants.
- *
- * The names are normalized: upper-case, no spaces, dashes or underscores.
- *
- * The names are inspired by the names used in real world crypto libraries.
- *
- * The classification into strong and weak are based on Wikipedia, OWASP and google (2017).
- */
-private module AlgorithmNames {
-  predicate isStrongHashingAlgorithm(string name) {
-    name = "DSA" or
-    name = "ED25519" or
-    name = "ES256" or
-    name = "ECDSA256" or
-    name = "ES384" or
-    name = "ECDSA384" or
-    name = "ES512" or
-    name = "ECDSA512" or
-    name = "SHA2" or
-    name = "SHA224" or
-    name = "SHA256" or
-    name = "SHA384" or
-    name = "SHA512" or
-    name = "SHA3"
-  }
-
-  predicate isWeakHashingAlgorithm(string name) {
-    name = "HAVEL128" or
-    name = "MD2" or
-    name = "MD4" or
-    name = "MD5" or
-    name = "PANAMA" or
-    name = "RIPEMD" or
-    name = "RIPEMD128" or
-    name = "RIPEMD256" or
-    name = "RIPEMD160" or
-    name = "RIPEMD320" or
-    name = "SHA0" or
-    name = "SHA1"
-  }
-
-  predicate isStrongEncryptionAlgorithm(string name) {
-    name = "AES" or
-    name = "AES128" or
-    name = "AES192" or
-    name = "AES256" or
-    name = "AES512" or
-    name = "RSA" or
-    name = "RABBIT" or
-    name = "BLOWFISH"
-  }
-
-  predicate isWeakEncryptionAlgorithm(string name) {
-    name = "DES" or
-    name = "3DES" or
-    name = "TRIPLEDES" or
-    name = "TDEA" or
-    name = "TRIPLEDEA" or
-    name = "ARC2" or
-    name = "RC2" or
-    name = "ARC4" or
-    name = "RC4" or
-    name = "ARCFOUR" or
-    name = "ARC5" or
-    name = "RC5"
-  }
-
-  predicate isStrongPasswordHashingAlgorithm(string name) {
-    name = "ARGON2" or
-    name = "PBKDF2" or
-    name = "BCRYPT" or
-    name = "SCRYPT"
-  }
-
-  predicate isWeakPasswordHashingAlgorithm(string name) { none() }
-}
-
-private import AlgorithmNames
-
-/**
- * A cryptographic algorithm.
- */
-private newtype TCryptographicAlgorithm =
-  MkHashingAlgorithm(string name, boolean isWeak) {
-    isStrongHashingAlgorithm(name) and isWeak = false
-    or
-    isWeakHashingAlgorithm(name) and isWeak = true
-  } or
-  MkEncryptionAlgorithm(string name, boolean isWeak) {
-    isStrongEncryptionAlgorithm(name) and isWeak = false
-    or
-    isWeakEncryptionAlgorithm(name) and isWeak = true
-  } or
-  MkPasswordHashingAlgorithm(string name, boolean isWeak) {
-    isStrongPasswordHashingAlgorithm(name) and isWeak = false
-    or
-    isWeakPasswordHashingAlgorithm(name) and isWeak = true
-  }
-
-/**
- * A cryptographic algorithm.
- */
-abstract class CryptographicAlgorithm extends TCryptographicAlgorithm {
-  /** Gets a textual representation of this element. */
-  string toString() { result = getName() }
-
-  /**
-   * Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores).
-   */
-  abstract string getName();
-
-  /**
-   * Holds if the name of this algorithm matches `name` modulo case,
-   * white space, dashes, and underscores.
-   */
-  bindingset[name]
-  predicate matchesName(string name) {
-    name.toUpperCase().regexpReplaceAll("[-_ ]", "") = getName()
-  }
-
-  /**
-   * Holds if this algorithm is weak.
-   */
-  abstract predicate isWeak();
-}
-
-/**
- * A hashing algorithm such as `MD5` or `SHA512`.
- */
-class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm {
-  string name;
-  boolean isWeak;
-
-  HashingAlgorithm() { this = MkHashingAlgorithm(name, isWeak) }
-
-  override string getName() { result = name }
-
-  override predicate isWeak() { isWeak = true }
-}
-
-/**
- * An encryption algorithm such as `DES` or `AES512`.
- */
-class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm {
-  string name;
-  boolean isWeak;
-
-  EncryptionAlgorithm() { this = MkEncryptionAlgorithm(name, isWeak) }
-
-  override string getName() { result = name }
-
-  override predicate isWeak() { isWeak = true }
-}
-
-/**
- * A password hashing algorithm such as `PBKDF2` or `SCRYPT`.
- */
-class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm {
-  string name;
-  boolean isWeak;
-
-  PasswordHashingAlgorithm() { this = MkPasswordHashingAlgorithm(name, isWeak) }
-
-  override string getName() { result = name }
-
-  override predicate isWeak() { isWeak = true }
-}
+import semmle.python.concepts.CryptoAlgorithms
--- a/python/ql/src/semmle/python/ApiGraphs.qll
+++ b/python/ql/src/semmle/python/ApiGraphs.qll
@@ -97,6 +97,11 @@ module API {
     */
    Node getASubclass() { result = getASuccessor(Label::subclass()) }

+    /**
+     * Gets a node representing the result from awaiting this node.
+     */
+    Node getAwaited() { result = getASuccessor(Label::await()) }
+
    /**
     * Gets a string representation of the lexicographically least among all shortest access paths
     * from the root to this node.
@@ -349,22 +354,95 @@ module API {
      )
    }

-    private import semmle.python.types.Builtins as Builtins
+    /** Gets the name of a known built-in. */
+    private string getBuiltInName() {
+      // These lists were created by inspecting the `builtins` and `__builtin__` modules in
+      // Python 3 and 2 respectively, using the `dir` built-in.
+      // Built-in functions and exceptions shared between Python 2 and 3
+      result in [
+          "abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
+          "compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
+          "float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
+          "id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
+          "max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
+          "property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
+          "staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__",
+          // Exceptions
+          "ArithmeticError", "AssertionError", "AttributeError", "BaseException", "BufferError",
+          "BytesWarning", "DeprecationWarning", "EOFError", "EnvironmentError", "Exception",
+          "FloatingPointError", "FutureWarning", "GeneratorExit", "IOError", "ImportError",
+          "ImportWarning", "IndentationError", "IndexError", "KeyError", "KeyboardInterrupt",
+          "LookupError", "MemoryError", "NameError", "NotImplemented", "NotImplementedError",
+          "OSError", "OverflowError", "PendingDeprecationWarning", "ReferenceError", "RuntimeError",
+          "RuntimeWarning", "StandardError", "StopIteration", "SyntaxError", "SyntaxWarning",
+          "SystemError", "SystemExit", "TabError", "TypeError", "UnboundLocalError",
+          "UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
+          "UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
+          // Added for compatibility
+          "exec"
+        ]
+      or
+      // Built-in constants shared between Python 2 and 3
+      result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
+      or
+      // Python 3 only
+      result in [
+          "ascii", "breakpoint", "bytes", "exec",
+          // Exceptions
+          "BlockingIOError", "BrokenPipeError", "ChildProcessError", "ConnectionAbortedError",
+          "ConnectionError", "ConnectionRefusedError", "ConnectionResetError", "FileExistsError",
+          "FileNotFoundError", "InterruptedError", "IsADirectoryError", "ModuleNotFoundError",
+          "NotADirectoryError", "PermissionError", "ProcessLookupError", "RecursionError",
+          "ResourceWarning", "StopAsyncIteration", "TimeoutError"
+        ]
+      or
+      // Python 2 only
+      result in [
+          "basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload",
+          "unichr", "unicode", "xrange"
+        ]
+    }

    /**
     * Gets a data flow node that is likely to refer to a built-in with the name `name`.
     *
-     * Currently this is an over-approximation, and does not account for things like overwriting a
+     * Currently this is an over-approximation, and may not account for things like overwriting a
     * built-in with a different value.
     */
    private DataFlow::Node likely_builtin(string name) {
-      result.asCfgNode() =
-        any(NameNode n |
-          n.isGlobal() and
-          n.isLoad() and
-          name = n.getId() and
-          name = any(Builtins::Builtin b).getName()
-        )
+      exists(Module m |
+        result.asCfgNode() =
+          any(NameNode n |
+            possible_builtin_accessed_in_module(n, name, m) and
+            not possible_builtin_defined_in_module(name, m)
+          )
+      )
+    }
+
+    /**
+     * Holds if a global variable called `name` (which is also the name of a built-in) is assigned
+     * a value in the module `m`.
+     */
+    private predicate possible_builtin_defined_in_module(string name, Module m) {
+      exists(NameNode n |
+        not exists(LocalVariable v | n.defines(v)) and
+        n.isStore() and
+        name = n.getId() and
+        name = getBuiltInName() and
+        m = n.getEnclosingModule()
+      )
+    }
+
+    /**
+     * Holds if `n` is an access of a global variable called `name` (which is also the name of a
+     * built-in) inside the module `m`.
+     */
+    private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
+      n.isGlobal() and
+      n.isLoad() and
+      name = n.getId() and
+      name = getBuiltInName() and
+      m = n.getEnclosingModule()
    }

    /**
@@ -396,6 +474,14 @@ module API {
        exists(DataFlow::Node superclass | pred.flowsTo(superclass) |
          ref.asExpr().(ClassExpr).getABase() = superclass.asExpr()
        )
+        or
+        // awaiting
+        exists(Await await, DataFlow::Node awaitedValue |
+          lbl = Label::await() and
+          ref.asExpr() = await and
+          await.getValue() = awaitedValue.asExpr() and
+          pred.flowsTo(awaitedValue)
+        )
      )
      or
      // Built-ins, treated as members of the module `builtins`
@@ -422,9 +508,9 @@ module API {
    }

    /**
-     * Gets a data-flow node to which `nd`, which is a use of an API-graph node, flows.
+     * Gets a data-flow node to which `src`, which is a use of an API-graph node, flows.
     *
-     * The flow from `nd` to that node may be inter-procedural.
+     * The flow from `src` to that node may be inter-procedural.
     */
    private DataFlow::LocalSourceNode trackUseNode(
      DataFlow::LocalSourceNode src, DataFlow::TypeTracker t
@@ -433,30 +519,26 @@ module API {
      use(_, src) and
      result = src
      or
-      // Due to bad performance when using `trackUseNode(t2, attr_name).track(t2, t)`
-      // we have inlined that code and forced a join
-      exists(DataFlow::StepSummary summary |
-        t = trackUseNode_first_join(src, result, summary).append(summary)
-      )
-    }
-
-    pragma[nomagic]
-    private DataFlow::TypeTracker trackUseNode_first_join(
-      DataFlow::LocalSourceNode src, DataFlow::LocalSourceNode res, DataFlow::StepSummary summary
-    ) {
-      DataFlow::StepSummary::step(trackUseNode(src, result), res, summary)
+      exists(DataFlow::TypeTracker t2 | result = trackUseNode(src, t2).track(t2, t))
    }

+    /**
+     * Gets a data-flow node to which `src`, which is a use of an API-graph node, flows.
+     *
+     * The flow from `src` to that node may be inter-procedural.
+     */
    cached
    DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
-      result = trackUseNode(src, DataFlow::TypeTracker::end())
+      result = trackUseNode(src, DataFlow::TypeTracker::end()) and
+      // We exclude module variable nodes, as these do not correspond to real uses.
+      not result instanceof DataFlow::ModuleVariableNode
    }

    /**
     * Holds if there is an edge from `pred` to `succ` in the API graph that is labeled with `lbl`.
     */
    cached
-    predicate edge(Node pred, string lbl, Node succ) {
+    predicate edge(TApiNode pred, string lbl, TApiNode succ) {
      /* There's an edge from the root node for each imported module. */
      exists(string m |
        pred = MkRoot() and
@@ -516,5 +598,9 @@ private module Label {
  /** Gets the `return` edge label. */
  string return() { result = "getReturn()" }

+  /** Gets the `subclass` edge label. */
  string subclass() { result = "getASubclass()" }
+
+  /** Gets the `await` edge label. */
+  string await() { result = "getAwaited()" }
 }
--- a/python/ql/src/semmle/python/Concepts.qll
+++ b/python/ql/src/semmle/python/Concepts.qll
@@ -527,7 +527,14 @@ module HTTP {
  }
 }

-/** Provides models for cryptographic things. */
+/**
+ * Provides models for cryptographic things.
+ *
+ * Note: The `CryptographicAlgorithm` class currently doesn't take weak keys into
+ * consideration for the `isWeak` member predicate. So RSA is always considered
+ * secure, although using a low number of bits will actually make it insecure. We plan
+ * to improve our libraries in the future to more precisely capture this aspect.
+ */
 module Cryptography {
  /** Provides models for public-key cryptography, also called asymmetric cryptography. */
  module PublicKey {
@@ -570,21 +577,7 @@ module Cryptography {
        arg = any(KeyGeneration::Range r).getKeySizeArg() and
        result = arg.getALocalSource()
        or
-        // Due to bad performance when using normal setup with we have inlined that code and forced a join
-        exists(DataFlow::TypeBackTracker t2 |
-          exists(DataFlow::StepSummary summary |
-            keysizeBacktracker_first_join(t2, arg, result, summary) and
-            t = t2.prepend(summary)
-          )
-        )
-      }
-
-      pragma[nomagic]
-      private predicate keysizeBacktracker_first_join(
-        DataFlow::TypeBackTracker t2, DataFlow::Node arg, DataFlow::Node res,
-        DataFlow::StepSummary summary
-      ) {
-        DataFlow::StepSummary::step(res, keysizeBacktracker(t2, arg), summary)
+        exists(DataFlow::TypeBackTracker t2 | result = keysizeBacktracker(t2, arg).backtrack(t2, t))
      }

      /** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */
@@ -640,4 +633,43 @@ module Cryptography {
      }
    }
  }
+
+  import semmle.python.concepts.CryptoAlgorithms
+
+  /**
+   * A data-flow node that is an application of a cryptographic algorithm. For example,
+   * encryption, decryption, signature-validation.
+   *
+   * Extend this class to refine existing API models. If you want to model new APIs,
+   * extend `CryptographicOperation::Range` instead.
+   */
+  class CryptographicOperation extends DataFlow::Node {
+    CryptographicOperation::Range range;
+
+    CryptographicOperation() { this = range }
+
+    /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */
+    CryptographicAlgorithm getAlgorithm() { result = range.getAlgorithm() }
+
+    /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
+    DataFlow::Node getAnInput() { result = range.getAnInput() }
+  }
+
+  /** Provides classes for modeling new applications of a cryptographic algorithms. */
+  module CryptographicOperation {
+    /**
+     * A data-flow node that is an application of a cryptographic algorithm. For example,
+     * encryption, decryption, signature-validation.
+     *
+     * Extend this class to model new APIs. If you want to refine existing API models,
+     * extend `CryptographicOperation` instead.
+     */
+    abstract class Range extends DataFlow::Node {
+      /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */
+      abstract CryptographicAlgorithm getAlgorithm();
+
+      /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
+      abstract DataFlow::Node getAnInput();
+    }
+  }
 }
--- a/python/ql/src/semmle/python/Files.qll
+++ b/python/ql/src/semmle/python/Files.qll
@@ -72,6 +72,33 @@ class File extends Container {
   * are specified to be extracted.
   */
  string getContents() { file_contents(this, result) }
+
+  /** Holds if this file is likely to get executed directly, and thus act as an entry point for execution. */
+  predicate isPossibleEntryPoint() {
+    // Only consider files in the source code, and not things like the standard library
+    exists(this.getRelativePath()) and
+    (
+      // The file doesn't have the extension `.py` but still contains Python statements
+      not this.getExtension().matches("py%") and
+      exists(Stmt s | s.getLocation().getFile() = this)
+      or
+      // The file contains the usual `if __name__ == '__main__':` construction
+      exists(If i, Name name, StrConst main, Cmpop op |
+        i.getScope().(Module).getFile() = this and
+        op instanceof Eq and
+        i.getTest().(Compare).compares(name, op, main) and
+        name.getId() = "__name__" and
+        main.getText() = "__main__"
+      )
+      or
+      // The file contains a `#!` line referencing the python interpreter
+      exists(Comment c |
+        c.getLocation().getFile() = this and
+        c.getLocation().getStartLine() = 1 and
+        c.getText().regexpMatch("^#! */.*python(2|3)?[ \\\\t]*$")
+      )
+    )
+  }
 }

 private predicate occupied_line(File f, int n) {
--- a/python/ql/src/semmle/python/Frameworks.qll
+++ b/python/ql/src/semmle/python/Frameworks.qll
@@ -2,17 +2,27 @@
 * Helper file that imports all framework modeling.
 */

+// If you add modeling of a new framework/library, remember to add it it to the docs in
+// `docs/codeql/support/reusables/frameworks.rst`
+private import semmle.python.frameworks.Aioch
+private import semmle.python.frameworks.Aiohttp
+private import semmle.python.frameworks.ClickhouseDriver
 private import semmle.python.frameworks.Cryptodome
 private import semmle.python.frameworks.Cryptography
 private import semmle.python.frameworks.Dill
 private import semmle.python.frameworks.Django
 private import semmle.python.frameworks.Fabric
 private import semmle.python.frameworks.Flask
+private import semmle.python.frameworks.Idna
 private import semmle.python.frameworks.Invoke
-private import semmle.python.frameworks.MysqlConnectorPython
+private import semmle.python.frameworks.Multidict
+private import semmle.python.frameworks.Mysql
 private import semmle.python.frameworks.MySQLdb
 private import semmle.python.frameworks.Psycopg2
 private import semmle.python.frameworks.PyMySQL
+private import semmle.python.frameworks.Simplejson
 private import semmle.python.frameworks.Stdlib
 private import semmle.python.frameworks.Tornado
+private import semmle.python.frameworks.Ujson
 private import semmle.python.frameworks.Yaml
+private import semmle.python.frameworks.Yarl
--- a/python/ql/src/semmle/python/Module.qll
+++ b/python/ql/src/semmle/python/Module.qll
@@ -205,11 +205,38 @@ private string moduleNameFromBase(Container file) {
  file instanceof File and result = file.getStem()
 }

+/**
+ * Holds if `file` may be transitively imported from a file that may serve as the entry point of
+ * the execution.
+ */
+private predicate transitively_imported_from_entry_point(File file) {
+  file.getExtension().matches("%py%") and
+  exists(File importer |
+    // Only consider files that are in the source archive
+    exists(importer.getRelativePath()) and
+    importer.getParent() = file.getParent() and
+    exists(ImportExpr i |
+      i.getLocation().getFile() = importer and
+      i.getName() = file.getStem() and
+      // Disregard relative imports
+      i.getLevel() = 0
+    )
+  |
+    importer.isPossibleEntryPoint() or transitively_imported_from_entry_point(importer)
+  )
+}
+
 string moduleNameFromFile(Container file) {
  exists(string basename |
    basename = moduleNameFromBase(file) and
-    legalShortName(basename) and
+    legalShortName(basename)
+  |
    result = moduleNameFromFile(file.getParent()) + "." + basename
+    or
+    // If `file` is a transitive import of a file that's executed directly, we allow references
+    // to it by its `basename`.
+    transitively_imported_from_entry_point(file) and
+    result = basename
  )
  or
  isPotentialSourcePackage(file) and
--- a/python/ql/src/semmle/python/concepts/CryptoAlgorithms.qll
+++ b/python/ql/src/semmle/python/concepts/CryptoAlgorithms.qll
@@ -0,0 +1,174 @@
+/**
+ * Provides classes modeling cryptographic algorithms, separated into strong and weak variants.
+ *
+ * The classification into strong and weak are based on Wikipedia, OWASP and google (2017).
+ */
+
+/**
+ * Names of cryptographic algorithms, separated into strong and weak variants.
+ *
+ * The names are normalized: upper-case, no spaces, dashes or underscores.
+ *
+ * The names are inspired by the names used in real world crypto libraries.
+ *
+ * The classification into strong and weak are based on Wikipedia, OWASP and google (2017).
+ */
+private module AlgorithmNames {
+  predicate isStrongHashingAlgorithm(string name) {
+    name = "DSA" or
+    name = "ED25519" or
+    name = "ES256" or
+    name = "ECDSA256" or
+    name = "ES384" or
+    name = "ECDSA384" or
+    name = "ES512" or
+    name = "ECDSA512" or
+    name = "SHA2" or
+    name = "SHA224" or
+    name = "SHA256" or
+    name = "SHA384" or
+    name = "SHA512" or
+    name = "SHA3"
+  }
+
+  predicate isWeakHashingAlgorithm(string name) {
+    name = "HAVEL128" or
+    name = "MD2" or
+    name = "MD4" or
+    name = "MD5" or
+    name = "PANAMA" or
+    name = "RIPEMD" or
+    name = "RIPEMD128" or
+    name = "RIPEMD256" or
+    name = "RIPEMD160" or
+    name = "RIPEMD320" or
+    name = "SHA0" or
+    name = "SHA1"
+  }
+
+  predicate isStrongEncryptionAlgorithm(string name) {
+    name = "AES" or
+    name = "AES128" or
+    name = "AES192" or
+    name = "AES256" or
+    name = "AES512" or
+    name = "RSA" or
+    name = "RABBIT" or
+    name = "BLOWFISH"
+  }
+
+  predicate isWeakEncryptionAlgorithm(string name) {
+    name = "DES" or
+    name = "3DES" or
+    name = "TRIPLEDES" or
+    name = "TDEA" or
+    name = "TRIPLEDEA" or
+    name = "ARC2" or
+    name = "RC2" or
+    name = "ARC4" or
+    name = "RC4" or
+    name = "ARCFOUR" or
+    name = "ARC5" or
+    name = "RC5"
+  }
+
+  predicate isStrongPasswordHashingAlgorithm(string name) {
+    name = "ARGON2" or
+    name = "PBKDF2" or
+    name = "BCRYPT" or
+    name = "SCRYPT"
+  }
+
+  predicate isWeakPasswordHashingAlgorithm(string name) { none() }
+}
+
+private import AlgorithmNames
+
+/**
+ * A cryptographic algorithm.
+ */
+private newtype TCryptographicAlgorithm =
+  MkHashingAlgorithm(string name, boolean isWeak) {
+    isStrongHashingAlgorithm(name) and isWeak = false
+    or
+    isWeakHashingAlgorithm(name) and isWeak = true
+  } or
+  MkEncryptionAlgorithm(string name, boolean isWeak) {
+    isStrongEncryptionAlgorithm(name) and isWeak = false
+    or
+    isWeakEncryptionAlgorithm(name) and isWeak = true
+  } or
+  MkPasswordHashingAlgorithm(string name, boolean isWeak) {
+    isStrongPasswordHashingAlgorithm(name) and isWeak = false
+    or
+    isWeakPasswordHashingAlgorithm(name) and isWeak = true
+  }
+
+/**
+ * A cryptographic algorithm.
+ */
+abstract class CryptographicAlgorithm extends TCryptographicAlgorithm {
+  /** Gets a textual representation of this element. */
+  string toString() { result = getName() }
+
+  /**
+   * Gets the normalized name of this algorithm (upper-case, no spaces, dashes or underscores).
+   */
+  abstract string getName();
+
+  /**
+   * Holds if the name of this algorithm matches `name` modulo case,
+   * white space, dashes, and underscores.
+   */
+  bindingset[name]
+  predicate matchesName(string name) {
+    name.toUpperCase().regexpReplaceAll("[-_ ]", "") = getName()
+  }
+
+  /**
+   * Holds if this algorithm is weak.
+   */
+  abstract predicate isWeak();
+}
+
+/**
+ * A hashing algorithm such as `MD5` or `SHA512`.
+ */
+class HashingAlgorithm extends MkHashingAlgorithm, CryptographicAlgorithm {
+  string name;
+  boolean isWeak;
+
+  HashingAlgorithm() { this = MkHashingAlgorithm(name, isWeak) }
+
+  override string getName() { result = name }
+
+  override predicate isWeak() { isWeak = true }
+}
+
+/**
+ * An encryption algorithm such as `DES` or `AES512`.
+ */
+class EncryptionAlgorithm extends MkEncryptionAlgorithm, CryptographicAlgorithm {
+  string name;
+  boolean isWeak;
+
+  EncryptionAlgorithm() { this = MkEncryptionAlgorithm(name, isWeak) }
+
+  override string getName() { result = name }
+
+  override predicate isWeak() { isWeak = true }
+}
+
+/**
+ * A password hashing algorithm such as `PBKDF2` or `SCRYPT`.
+ */
+class PasswordHashingAlgorithm extends MkPasswordHashingAlgorithm, CryptographicAlgorithm {
+  string name;
+  boolean isWeak;
+
+  PasswordHashingAlgorithm() { this = MkPasswordHashingAlgorithm(name, isWeak) }
+
+  override string getName() { result = name }
+
+  override predicate isWeak() { isWeak = true }
+}
--- a/python/ql/src/semmle/python/dataflow/new/SensitiveDataSources.qll
+++ b/python/ql/src/semmle/python/dataflow/new/SensitiveDataSources.qll
@@ -0,0 +1,265 @@
+/**
+ * Provides an extension point for for modeling sensitive data, such as secrets, certificates, or passwords.
+ * Sensitive data can be interesting to use as data-flow sources in security queries.
+ */
+
+private import python
+private import semmle.python.dataflow.new.DataFlow
+// Need to import `semmle.python.Frameworks` since frameworks can extend `SensitiveDataSource::Range`
+private import semmle.python.Frameworks
+private import semmle.python.security.internal.SensitiveDataHeuristics as SensitiveDataHeuristics
+
+// We export these explicitly, so we don't also export the `HeuristicNames` module.
+class SensitiveDataClassification = SensitiveDataHeuristics::SensitiveDataClassification;
+
+module SensitiveDataClassification = SensitiveDataHeuristics::SensitiveDataClassification;
+
+/**
+ * A data flow source of sensitive data, such as secrets, certificates, or passwords.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `SensitiveDataSource::Range` instead.
+ */
+class SensitiveDataSource extends DataFlow::Node {
+  SensitiveDataSource::Range range;
+
+  SensitiveDataSource() { this = range }
+
+  /**
+   * Gets the classification of the sensitive data.
+   */
+  SensitiveDataClassification getClassification() { result = range.getClassification() }
+}
+
+/** Provides a class for modeling new sources of sensitive data, such as secrets, certificates, or passwords. */
+module SensitiveDataSource {
+  /**
+   * A data flow source of sensitive data, such as secrets, certificates, or passwords.
+   *
+   * Extend this class to model new APIs. If you want to refine existing API models,
+   * extend `SensitiveDataSource` instead.
+   */
+  abstract class Range extends DataFlow::Node {
+    /**
+     * Gets the classification of the sensitive data.
+     */
+    abstract SensitiveDataClassification getClassification();
+  }
+}
+
+/** Actual sensitive data modeling */
+private module SensitiveDataModeling {
+  private import SensitiveDataHeuristics::HeuristicNames
+
+  /**
+   * Gets a reference to a function that is considered to be a sensitive source of
+   * `classification`.
+   */
+  private DataFlow::LocalSourceNode sensitiveFunction(
+    DataFlow::TypeTracker t, SensitiveDataClassification classification
+  ) {
+    t.start() and
+    exists(Function f |
+      nameIndicatesSensitiveData(f.getName(), classification) and
+      result.asExpr() = f.getDefinition()
+    )
+    or
+    exists(DataFlow::TypeTracker t2 | result = sensitiveFunction(t2, classification).track(t2, t))
+  }
+
+  /**
+   * Gets a reference to a function that is considered to be a sensitive source of
+   * `classification`.
+   */
+  DataFlow::Node sensitiveFunction(SensitiveDataClassification classification) {
+    sensitiveFunction(DataFlow::TypeTracker::end(), classification).flowsTo(result)
+  }
+
+  /**
+   * Gets a reference to a string constant that, if used as the key in a lookup,
+   * indicates the presence of sensitive data with `classification`.
+   */
+  private DataFlow::LocalSourceNode sensitiveLookupStringConst(
+    DataFlow::TypeTracker t, SensitiveDataClassification classification
+  ) {
+    t.start() and
+    nameIndicatesSensitiveData(result.asExpr().(StrConst).getText(), classification)
+    or
+    exists(DataFlow::TypeTracker t2 |
+      result = sensitiveLookupStringConst(t2, classification).track(t2, t)
+    )
+  }
+
+  /**
+   * Gets a reference to a string constant that, if used as the key in a lookup,
+   * indicates the presence of sensitive data with `classification`.
+   *
+   * Also see `extraStepForCalls`.
+   */
+  DataFlow::Node sensitiveLookupStringConst(SensitiveDataClassification classification) {
+    sensitiveLookupStringConst(DataFlow::TypeTracker::end(), classification).flowsTo(result)
+  }
+
+  /** A function call that is considered a source of sensitive data. */
+  class SensitiveFunctionCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
+    SensitiveDataClassification classification;
+
+    SensitiveFunctionCall() {
+      this.getFunction() = sensitiveFunction(classification)
+      or
+      // to cover functions that we don't have the definition for, and where the
+      // reference to the function has not already been marked as being sensitive
+      nameIndicatesSensitiveData(this.getFunction().asCfgNode().(NameNode).getId(), classification)
+    }
+
+    override SensitiveDataClassification getClassification() { result = classification }
+  }
+
+  /**
+   * Tracks any modeled source of sensitive data (with any classification),
+   * to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
+   */
+  private DataFlow::LocalSourceNode possibleSensitiveCallable(DataFlow::TypeTracker t) {
+    t.start() and
+    result instanceof SensitiveDataSource
+    or
+    exists(DataFlow::TypeTracker t2 | result = possibleSensitiveCallable(t2).track(t2, t))
+  }
+
+  /**
+   * Tracks any modeled source of sensitive data (with any classification),
+   * to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
+   */
+  private DataFlow::Node possibleSensitiveCallable() {
+    possibleSensitiveCallable(DataFlow::TypeTracker::end()).flowsTo(result)
+  }
+
+  /**
+   * Holds if the step from `nodeFrom` to `nodeTo` should be considered a
+   * taint-flow step for sensitive-data, to ensure calls are handled correctly.
+   *
+   * To handle calls properly, while preserving a good source for path explanations,
+   * you need to include this predicate as an additional taint step in your taint-tracking
+   * configurations.
+   *
+   * The core problem can be illustrated by the example below. If we consider the
+   * `print` a sink, what path and what source do we want to show? My initial approach
+   * would be to use type-tracking to propagate from the `not_found.get_passwd` attribute
+   * lookup, to the use of `non_sensitive_name`, and then create a new `SensitiveDataSource::Range`
+   * like `SensitiveFunctionCall`. Although that seems likely to work, it will also end up
+   * with a non-optimal path, which starts at _bad source_, and therefore doesn't show
+   * how we figured out that `non_sensitive_name`
+   * could be a function that returns a password (and in cases where there is many calls to
+   * `my_func` it will be annoying for someone to figure this out manually).
+   *
+   * By including this additional taint-step in the taint-tracking configuration, it's possible
+   * to get a path explanation going from _good source_ to the sink.
+   *
+   * ```python
+   * def my_func(non_sensitive_name):
+   *     x = non_sensitive_name() # <-- bad source
+   *     print(x) # <-- sink
+   *
+   * import not_found
+   * f = not_found.get_passwd # <-- good source
+   * my_func(f)
+   * ```
+   */
+  predicate extraStepForCalls(DataFlow::Node nodeFrom, DataFlow::CallCfgNode nodeTo) {
+    // However, we do still use the type-tracking approach to limit the size of this
+    // predicate.
+    nodeTo.getFunction() = nodeFrom and
+    nodeFrom = possibleSensitiveCallable()
+  }
+
+  /**
+   * Any kind of variable assignment (also including with/for) where the name indicates
+   * it contains sensitive data.
+   *
+   * Note: We _could_ make any access to a variable with a sensitive name a source of
+   * sensitive data, but to make path explanations in data-flow/taint-tracking good,
+   * we don't want that, since it works against allowing users to understand the flow
+   * in the program (which is the whole point).
+   *
+   * Note: To make data-flow/taint-tracking work, the expression that is _assigned_ to
+   * the variable is marked as the source (as compared to marking the variable as the
+   * source).
+   */
+  class SensitiveVariableAssignment extends SensitiveDataSource::Range {
+    SensitiveDataClassification classification;
+
+    SensitiveVariableAssignment() {
+      exists(DefinitionNode def |
+        nameIndicatesSensitiveData(def.(NameNode).getId(), classification) and
+        (
+          this.asCfgNode() = def.getValue()
+          or
+          this.asCfgNode() = def.getValue().(ForNode).getSequence()
+        ) and
+        not this.asExpr() instanceof FunctionExpr and
+        not this.asExpr() instanceof ClassExpr
+      )
+      or
+      exists(With with |
+        nameIndicatesSensitiveData(with.getOptionalVars().(Name).getId(), classification) and
+        this.asExpr() = with.getContextExpr()
+      )
+    }
+
+    override SensitiveDataClassification getClassification() { result = classification }
+  }
+
+  /** An attribute access that is considered a source of sensitive data. */
+  class SensitiveAttributeAccess extends SensitiveDataSource::Range {
+    SensitiveDataClassification classification;
+
+    SensitiveAttributeAccess() {
+      // Things like `foo.<sensitive-name>` or `from <module> import <sensitive-name>`
+      // I considered excluding any `from ... import something_sensitive`, but then realized that
+      // we should flag up `form ... import password as ...` as a password
+      nameIndicatesSensitiveData(this.(DataFlow::AttrRead).getAttributeName(), classification)
+      or
+      // Things like `getattr(foo, <reference-to-string>)`
+      this.(DataFlow::AttrRead).getAttributeNameExpr() = sensitiveLookupStringConst(classification)
+    }
+
+    override SensitiveDataClassification getClassification() { result = classification }
+  }
+
+  /** A subscript, where the key indicates the result will be sensitive data. */
+  class SensitiveSubscript extends SensitiveDataSource::Range {
+    SensitiveDataClassification classification;
+
+    SensitiveSubscript() {
+      this.asCfgNode().(SubscriptNode).getIndex() =
+        sensitiveLookupStringConst(classification).asCfgNode()
+    }
+
+    override SensitiveDataClassification getClassification() { result = classification }
+  }
+
+  /** A call to `get` on an object, where the key indicates the result will be sensitive data. */
+  class SensitiveGetCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
+    SensitiveDataClassification classification;
+
+    SensitiveGetCall() {
+      this.getFunction().asCfgNode().(AttrNode).getName() = "get" and
+      this.getArg(0) = sensitiveLookupStringConst(classification)
+    }
+
+    override SensitiveDataClassification getClassification() { result = classification }
+  }
+
+  /** A parameter where the name indicates it will receive sensitive data. */
+  class SensitiveParameter extends SensitiveDataSource::Range, DataFlow::ParameterNode {
+    SensitiveDataClassification classification;
+
+    SensitiveParameter() {
+      nameIndicatesSensitiveData(this.getParameter().getName(), classification)
+    }
+
+    override SensitiveDataClassification getClassification() { result = classification }
+  }
+}
+
+predicate sensitiveDataExtraStepForCalls = SensitiveDataModeling::extraStepForCalls/2;
--- a/python/ql/src/semmle/python/dataflow/new/TypeTracker.qll
+++ b/python/ql/src/semmle/python/dataflow/new/TypeTracker.qll
@@ -1,173 +1,16 @@
-/** Step Summaries and Type Tracking */
+/**
+ * This file acts as a wrapper for `internal.TypeTracker`, exposing some of the functionality with
+ * names that are more appropriate for Python.
+ */

 private import python
-private import internal.DataFlowPublic
-private import internal.DataFlowPrivate
+private import internal.TypeTracker as Internal

 /** Any string that may appear as the name of an attribute or access path. */
-class AttributeName extends string {
-  AttributeName() { this = any(AttrRef a).getAttributeName() }
-}
+class AttributeName = Internal::ContentName;

 /** Either an attribute name, or the empty string (representing no attribute). */
-class OptionalAttributeName extends string {
-  OptionalAttributeName() { this instanceof AttributeName or this = "" }
-}
-
-/**
- * A description of a step on an inter-procedural data flow path.
- */
-private newtype TStepSummary =
-  LevelStep() or
-  CallStep() or
-  ReturnStep() or
-  StoreStep(AttributeName attr) or
-  LoadStep(AttributeName attr)
-
-/**
- * INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
- *
- * A description of a step on an inter-procedural data flow path.
- */
-class StepSummary extends TStepSummary {
-  /** Gets a textual representation of this step summary. */
-  string toString() {
-    this instanceof LevelStep and result = "level"
-    or
-    this instanceof CallStep and result = "call"
-    or
-    this instanceof ReturnStep and result = "return"
-    or
-    exists(string attr | this = StoreStep(attr) | result = "store " + attr)
-    or
-    exists(string attr | this = LoadStep(attr) | result = "load " + attr)
-  }
-}
-
-/** Provides predicates for updating step summaries (`StepSummary`s). */
-module StepSummary {
-  /**
-   * Gets the summary that corresponds to having taken a forwards
-   * heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
-   */
-  cached
-  predicate step(LocalSourceNode nodeFrom, Node nodeTo, StepSummary summary) {
-    exists(Node mid | typePreservingStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
-  }
-
-  /**
-   * Gets the summary that corresponds to having taken a forwards
-   * local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
-   *
-   * Unlike `StepSummary::step`, this predicate does not compress
-   * type-preserving steps.
-   */
-  predicate smallstep(Node nodeFrom, Node nodeTo, StepSummary summary) {
-    typePreservingStep(nodeFrom, nodeTo) and
-    summary = LevelStep()
-    or
-    callStep(nodeFrom, nodeTo) and summary = CallStep()
-    or
-    returnStep(nodeFrom, nodeTo) and
-    summary = ReturnStep()
-    or
-    exists(string attr |
-      basicStoreStep(nodeFrom, nodeTo, attr) and
-      summary = StoreStep(attr)
-      or
-      basicLoadStep(nodeFrom, nodeTo, attr) and summary = LoadStep(attr)
-    )
-  }
-}
-
-/** Holds if it's reasonable to expect the data flow step from `nodeFrom` to `nodeTo` to preserve types. */
-private predicate typePreservingStep(Node nodeFrom, Node nodeTo) {
-  simpleLocalFlowStep(nodeFrom, nodeTo) or
-  jumpStep(nodeFrom, nodeTo)
-}
-
-/**
- * Gets a callable for the call where `nodeFrom` is used as the `i`'th argument.
- *
- * Helper predicate to avoid bad join order experienced in `callStep`.
- * This happened when `isParameterOf` was joined _before_ `getCallable`.
- */
-pragma[nomagic]
-private DataFlowCallable getCallableForArgument(ArgumentNode nodeFrom, int i) {
-  exists(DataFlowCall call |
-    nodeFrom.argumentOf(call, i) and
-    result = call.getCallable()
-  )
-}
-
-/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
-predicate callStep(ArgumentNode nodeFrom, ParameterNode nodeTo) {
-  // TODO: Support special methods?
-  exists(DataFlowCallable callable, int i |
-    callable = getCallableForArgument(nodeFrom, i) and
-    nodeTo.isParameterOf(callable, i)
-  )
-}
-
-/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
-predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
-  exists(DataFlowCall call |
-    nodeFrom.getEnclosingCallable() = call.getCallable() and nodeTo.asCfgNode() = call.getNode()
-  )
-}
-
-/**
- * Holds if `nodeFrom` is being written to the `attr` attribute of the object in `nodeTo`.
- *
- * Note that the choice of `nodeTo` does not have to make sense "chronologically".
- * All we care about is whether the `attr` attribute of `nodeTo` can have a specific type,
- * and the assumption is that if a specific type appears here, then any access of that
- * particular attribute can yield something of that particular type.
- *
- * Thus, in an example such as
- *
- * ```python
- * def foo(y):
- *    x = Foo()
- *    bar(x)
- *    x.attr = y
- *    baz(x)
- *
- * def bar(x):
- *    z = x.attr
- * ```
- * for the attribute write `x.attr = y`, we will have `attr` being the literal string `"attr"`,
- * `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
- * function. This means we will track the fact that `x.attr` can have the type of `y` into the
- * assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
- */
-predicate basicStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string attr) {
-  exists(AttrWrite a |
-    a.mayHaveAttributeName(attr) and
-    nodeFrom = a.getValue() and
-    nodeTo.flowsTo(a.getObject())
-  )
-}
-
-/**
- * Holds if `nodeTo` is the result of accessing the `attr` attribute of `nodeFrom`.
- */
-predicate basicLoadStep(Node nodeFrom, Node nodeTo, string attr) {
-  exists(AttrRead a |
-    a.mayHaveAttributeName(attr) and
-    nodeFrom = a.getObject() and
-    nodeTo = a
-  )
-}
-
-/**
- * A utility class that is equivalent to `boolean` but does not require type joining.
- */
-private class Boolean extends boolean {
-  Boolean() { this = true or this = false }
-}
-
-private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeName attr)
+class OptionalAttributeName = Internal::OptionalContentName;

 /**
 * Summary of the steps needed to track a value to a given dataflow node.
@@ -179,8 +22,8 @@ private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeN
 *
 * It is recommended that all uses of this type are written in the following form,
 * for tracking some type `myType`:
- * ```
- * private DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
+ * ```ql
+ * DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
 *   t.start() and
 *   result = < source of myType >
 *   or
@@ -189,279 +32,34 @@ private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeN
 *   )
 * }
 *
- * DataFlow::Node myType() { myType(DataFlow::TypeTracker::end()).flowsTo(result) }
+ * DataFlow::LocalSourceNode myType() { myType(DataFlow::TypeTracker::end()) }
 * ```
 *
 * Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
 * `t = t2.step(myType(t2), result)`. If you additionally want to track individual
 * intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
 */
-class TypeTracker extends TTypeTracker {
-  Boolean hasCall;
-  OptionalAttributeName attr;
-
-  TypeTracker() { this = MkTypeTracker(hasCall, attr) }
-
-  /** Gets the summary resulting from appending `step` to this type-tracking summary. */
-  cached
-  TypeTracker append(StepSummary step) {
-    step = LevelStep() and result = this
-    or
-    step = CallStep() and result = MkTypeTracker(true, attr)
-    or
-    step = ReturnStep() and hasCall = false and result = this
-    or
-    step = LoadStep(attr) and result = MkTypeTracker(hasCall, "")
-    or
-    exists(string p | step = StoreStep(p) and attr = "" and result = MkTypeTracker(hasCall, p))
-  }
-
-  /** Gets a textual representation of this summary. */
-  string toString() {
-    exists(string withCall, string withAttr |
-      (if hasCall = true then withCall = "with" else withCall = "without") and
-      (if attr != "" then withAttr = " with attribute " + attr else withAttr = "") and
-      result = "type tracker " + withCall + " call steps" + withAttr
-    )
-  }
-
-  /**
-   * Holds if this is the starting point of type tracking.
-   */
-  predicate start() { hasCall = false and attr = "" }
-
+class TypeTracker extends Internal::TypeTracker {
  /**
   * Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
   * The type tracking only ends after the attribute has been loaded.
   */
-  predicate startInAttr(AttributeName attrName) { hasCall = false and attr = attrName }
-
-  /**
-   * Holds if this is the starting point of type tracking
-   * when tracking a parameter into a call, but not out of it.
-   */
-  predicate call() { hasCall = true and attr = "" }
-
-  /**
-   * Holds if this is the end point of type tracking.
-   */
-  predicate end() { attr = "" }
-
-  /**
-   * INTERNAL. DO NOT USE.
-   *
-   * Holds if this type has been tracked into a call.
-   */
-  boolean hasCall() { result = hasCall }
+  predicate startInAttr(string attrName) { this.startInContent(attrName) }

  /**
   * INTERNAL. DO NOT USE.
   *
   * Gets the attribute associated with this type tracker.
   */
-  string getAttr() { result = attr }
-
-  /**
-   * Gets a type tracker that starts where this one has left off to allow continued
-   * tracking.
-   *
-   * This predicate is only defined if the type has not been tracked into an attribute.
-   */
-  TypeTracker continue() { attr = "" and result = this }
-
-  /**
-   * Gets the summary that corresponds to having taken a forwards
-   * heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
-   */
-  pragma[inline]
-  TypeTracker step(LocalSourceNode nodeFrom, Node nodeTo) {
-    exists(StepSummary summary |
-      StepSummary::step(nodeFrom, nodeTo, summary) and
-      result = this.append(summary)
-    )
-  }
-
-  /**
-   * Gets the summary that corresponds to having taken a forwards
-   * local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
-   *
-   * Unlike `TypeTracker::step`, this predicate exposes all edges
-   * in the flow graph, and not just the edges between `Node`s.
-   * It may therefore be less performant.
-   *
-   * Type tracking predicates using small steps typically take the following form:
-   * ```ql
-   * DataFlow::Node myType(DataFlow::TypeTracker t) {
-   *   t.start() and
-   *   result = < source of myType >
-   *   or
-   *   exists (DataFlow::TypeTracker t2 |
-   *     t = t2.smallstep(myType(t2), result)
-   *   )
-   * }
-   *
-   * DataFlow::Node myType() {
-   *   result = myType(DataFlow::TypeTracker::end())
-   * }
-   * ```
-   */
-  pragma[inline]
-  TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
-    exists(StepSummary summary |
-      StepSummary::smallstep(nodeFrom, nodeTo, summary) and
-      result = this.append(summary)
-    )
-    or
-    typePreservingStep(nodeFrom, nodeTo) and
-    result = this
-  }
+  string getAttr() { result = this.getContent() }
 }

-/** Provides predicates for implementing custom `TypeTracker`s. */
-module TypeTracker {
-  /**
-   * Gets a valid end point of type tracking.
-   */
-  TypeTracker end() { result.end() }
-}
+module TypeTracker = Internal::TypeTracker;

-private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalAttributeName attr)
+class StepSummary = Internal::StepSummary;

-/**
- * Summary of the steps needed to back-track a use of a value to a given dataflow node.
- *
- * This can for example be used to track callbacks that are passed to a certain API,
- * so we can model specific parameters of that callback as having a certain type.
- *
- * Note that type back-tracking does not provide a source/sink relation, that is,
- * it may determine that a node will be used in an API call somewhere, but it won't
- * determine exactly where that use was, or the path that led to the use.
- *
- * It is recommended that all uses of this type are written in the following form,
- * for back-tracking some callback type `myCallback`:
- *
- * ```
- * private DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
- *   t.start() and
- *   result = (< some API call >).getArgument(< n >).getALocalSource()
- *   or
- *   exists (DataFlow::TypeBackTracker t2 |
- *     result = myCallback(t2).backtrack(t2, t)
- *   )
- * }
- *
- * DataFlow::LocalSourceNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
- * ```
- *
- * Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent
- * `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual
- * intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`.
- */
-class TypeBackTracker extends TTypeBackTracker {
-  Boolean hasReturn;
-  string attr;
+module StepSummary = Internal::StepSummary;

-  TypeBackTracker() { this = MkTypeBackTracker(hasReturn, attr) }
+class TypeBackTracker = Internal::TypeBackTracker;

-  /** Gets the summary resulting from prepending `step` to this type-tracking summary. */
-  TypeBackTracker prepend(StepSummary step) {
-    step = LevelStep() and result = this
-    or
-    step = CallStep() and hasReturn = false and result = this
-    or
-    step = ReturnStep() and result = MkTypeBackTracker(true, attr)
-    or
-    exists(string p | step = LoadStep(p) and attr = "" and result = MkTypeBackTracker(hasReturn, p))
-    or
-    step = StoreStep(attr) and result = MkTypeBackTracker(hasReturn, "")
-  }
-
-  /** Gets a textual representation of this summary. */
-  string toString() {
-    exists(string withReturn, string withAttr |
-      (if hasReturn = true then withReturn = "with" else withReturn = "without") and
-      (if attr != "" then withAttr = " with attribute " + attr else withAttr = "") and
-      result = "type back-tracker " + withReturn + " return steps" + withAttr
-    )
-  }
-
-  /**
-   * Holds if this is the starting point of type tracking.
-   */
-  predicate start() { hasReturn = false and attr = "" }
-
-  /**
-   * Holds if this is the end point of type tracking.
-   */
-  predicate end() { attr = "" }
-
-  /**
-   * INTERNAL. DO NOT USE.
-   *
-   * Holds if this type has been back-tracked into a call through return edge.
-   */
-  boolean hasReturn() { result = hasReturn }
-
-  /**
-   * Gets a type tracker that starts where this one has left off to allow continued
-   * tracking.
-   *
-   * This predicate is only defined if the type has not been tracked into an attribute.
-   */
-  TypeBackTracker continue() { attr = "" and result = this }
-
-  /**
-   * Gets the summary that corresponds to having taken a backwards
-   * heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
-   */
-  pragma[inline]
-  TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
-    exists(StepSummary summary |
-      StepSummary::step(nodeFrom, nodeTo, summary) and
-      this = result.prepend(summary)
-    )
-  }
-
-  /**
-   * Gets the summary that corresponds to having taken a backwards
-   * local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
-   *
-   * Unlike `TypeBackTracker::step`, this predicate exposes all edges
-   * in the flowgraph, and not just the edges between
-   * `LocalSourceNode`s. It may therefore be less performant.
-   *
-   * Type tracking predicates using small steps typically take the following form:
-   * ```ql
-   * DataFlow::Node myType(DataFlow::TypeBackTracker t) {
-   *   t.start() and
-   *   result = < some API call >.getArgument(< n >)
-   *   or
-   *   exists (DataFlow::TypeBackTracker t2 |
-   *     t = t2.smallstep(result, myType(t2))
-   *   )
-   * }
-   *
-   * DataFlow::Node myType() {
-   *   result = myType(DataFlow::TypeBackTracker::end())
-   * }
-   * ```
-   */
-  pragma[inline]
-  TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) {
-    exists(StepSummary summary |
-      StepSummary::smallstep(nodeFrom, nodeTo, summary) and
-      this = result.prepend(summary)
-    )
-    or
-    typePreservingStep(nodeFrom, nodeTo) and
-    this = result
-  }
-}
-
-/** Provides predicates for implementing custom `TypeBackTracker`s. */
-module TypeBackTracker {
-  /**
-   * Gets a valid end point of type back-tracking.
-   */
-  TypeBackTracker end() { result.end() }
-}
+module TypeBackTracker = Internal::TypeBackTracker;
--- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl.qll
+++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl.qll
--- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl2.qll
+++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl2.qll
--- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl3.qll
+++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl3.qll
--- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl4.qll
+++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImpl4.qll
--- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImplCommon.qll
+++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImplCommon.qll
@@ -31,26 +31,26 @@ predicate accessPathCostLimits(int apLimit, int tupleLimit) {
 * currently excludes read-steps, store-steps, and flow-through.
 *
 * The analysis uses non-linear recursion: When computing a flow path in or out
- * of a call, we use the results of the analysis recursively to resolve lamba
+ * of a call, we use the results of the analysis recursively to resolve lambda
 * calls. For this reason, we cannot reuse the code from `DataFlowImpl.qll` directly.
 */
 private module LambdaFlow {
-  private predicate viableParamNonLambda(DataFlowCall call, int i, ParameterNode p) {
+  private predicate viableParamNonLambda(DataFlowCall call, int i, ParamNode p) {
    p.isParameterOf(viableCallable(call), i)
  }

-  private predicate viableParamLambda(DataFlowCall call, int i, ParameterNode p) {
+  private predicate viableParamLambda(DataFlowCall call, int i, ParamNode p) {
    p.isParameterOf(viableCallableLambda(call, _), i)
  }

-  private predicate viableParamArgNonLambda(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
+  private predicate viableParamArgNonLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
    exists(int i |
      viableParamNonLambda(call, i, p) and
      arg.argumentOf(call, i)
    )
  }

-  private predicate viableParamArgLambda(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
+  private predicate viableParamArgLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
    exists(int i |
      viableParamLambda(call, i, p) and
      arg.argumentOf(call, i)
@@ -118,8 +118,8 @@ private module LambdaFlow {
    boolean toJump, DataFlowCallOption lastCall
  ) {
    revLambdaFlow0(lambdaCall, kind, node, t, toReturn, toJump, lastCall) and
-    if node instanceof CastNode or node instanceof ArgumentNode or node instanceof ReturnNode
-    then compatibleTypes(t, getNodeType(node))
+    if castNode(node) or node instanceof ArgNode or node instanceof ReturnNode
+    then compatibleTypes(t, getNodeDataFlowType(node))
    else any()
  }

@@ -129,7 +129,7 @@ private module LambdaFlow {
    boolean toJump, DataFlowCallOption lastCall
  ) {
    lambdaCall(lambdaCall, kind, node) and
-    t = getNodeType(node) and
+    t = getNodeDataFlowType(node) and
    toReturn = false and
    toJump = false and
    lastCall = TDataFlowCallNone()
@@ -146,7 +146,7 @@ private module LambdaFlow {
        getNodeEnclosingCallable(node) = getNodeEnclosingCallable(mid)
      |
        preservesValue = false and
-        t = getNodeType(node)
+        t = getNodeDataFlowType(node)
        or
        preservesValue = true and
        t = t0
@@ -160,7 +160,7 @@ private module LambdaFlow {
      toJump = true and
      lastCall = TDataFlowCallNone()
    |
-      jumpStep(node, mid) and
+      jumpStepCached(node, mid) and
      t = t0
      or
      exists(boolean preservesValue |
@@ -168,7 +168,7 @@ private module LambdaFlow {
        getNodeEnclosingCallable(node) != getNodeEnclosingCallable(mid)
      |
        preservesValue = false and
-        t = getNodeType(node)
+        t = getNodeDataFlowType(node)
        or
        preservesValue = true and
        t = t0
@@ -176,7 +176,7 @@ private module LambdaFlow {
    )
    or
    // flow into a callable
-    exists(ParameterNode p, DataFlowCallOption lastCall0, DataFlowCall call |
+    exists(ParamNode p, DataFlowCallOption lastCall0, DataFlowCall call |
      revLambdaFlowIn(lambdaCall, kind, p, t, toJump, lastCall0) and
      (
        if lastCall0 = TDataFlowCallNone() and toJump = false
@@ -227,7 +227,7 @@ private module LambdaFlow {

  pragma[nomagic]
  predicate revLambdaFlowIn(
-    DataFlowCall lambdaCall, LambdaCallKind kind, ParameterNode p, DataFlowType t, boolean toJump,
+    DataFlowCall lambdaCall, LambdaCallKind kind, ParamNode p, DataFlowType t, boolean toJump,
    DataFlowCallOption lastCall
  ) {
    revLambdaFlow(lambdaCall, kind, p, t, false, toJump, lastCall)
@@ -242,6 +242,89 @@ private DataFlowCallable viableCallableExt(DataFlowCall call) {

 cached
 private module Cached {
+  /**
+   * If needed, call this predicate from `DataFlowImplSpecific.qll` in order to
+   * force a stage-dependency on the `DataFlowImplCommon.qll` stage and therby
+   * collapsing the two stages.
+   */
+  cached
+  predicate forceCachingInSameStage() { any() }
+
+  cached
+  predicate nodeEnclosingCallable(Node n, DataFlowCallable c) { c = n.getEnclosingCallable() }
+
+  cached
+  predicate callEnclosingCallable(DataFlowCall call, DataFlowCallable c) {
+    c = call.getEnclosingCallable()
+  }
+
+  cached
+  predicate nodeDataFlowType(Node n, DataFlowType t) { t = getNodeType(n) }
+
+  cached
+  predicate jumpStepCached(Node node1, Node node2) { jumpStep(node1, node2) }
+
+  cached
+  predicate clearsContentCached(Node n, Content c) { clearsContent(n, c) }
+
+  cached
+  predicate isUnreachableInCallCached(Node n, DataFlowCall call) { isUnreachableInCall(n, call) }
+
+  cached
+  predicate outNodeExt(Node n) {
+    n instanceof OutNode
+    or
+    n.(PostUpdateNode).getPreUpdateNode() instanceof ArgNode
+  }
+
+  cached
+  predicate hiddenNode(Node n) { nodeIsHidden(n) }
+
+  cached
+  OutNodeExt getAnOutNodeExt(DataFlowCall call, ReturnKindExt k) {
+    result = getAnOutNode(call, k.(ValueReturnKind).getKind())
+    or
+    exists(ArgNode arg |
+      result.(PostUpdateNode).getPreUpdateNode() = arg and
+      arg.argumentOf(call, k.(ParamUpdateReturnKind).getPosition())
+    )
+  }
+
+  cached
+  predicate returnNodeExt(Node n, ReturnKindExt k) {
+    k = TValueReturn(n.(ReturnNode).getKind())
+    or
+    exists(ParamNode p, int pos |
+      parameterValueFlowsToPreUpdate(p, n) and
+      p.isParameterOf(_, pos) and
+      k = TParamUpdate(pos)
+    )
+  }
+
+  cached
+  predicate castNode(Node n) { n instanceof CastNode }
+
+  cached
+  predicate castingNode(Node n) {
+    castNode(n) or
+    n instanceof ParamNode or
+    n instanceof OutNodeExt or
+    // For reads, `x.f`, we want to check that the tracked type after the read (which
+    // is obtained by popping the head of the access path stack) is compatible with
+    // the type of `x.f`.
+    read(_, _, n)
+  }
+
+  cached
+  predicate parameterNode(Node n, DataFlowCallable c, int i) {
+    n.(ParameterNode).isParameterOf(c, i)
+  }
+
+  cached
+  predicate argumentNode(Node n, DataFlowCall call, int pos) {
+    n.(ArgumentNode).argumentOf(call, pos)
+  }
+
  /**
   * Gets a viable target for the lambda call `call`.
   *
@@ -261,7 +344,7 @@ private module Cached {
   * The instance parameter is considered to have index `-1`.
   */
  pragma[nomagic]
-  private predicate viableParam(DataFlowCall call, int i, ParameterNode p) {
+  private predicate viableParam(DataFlowCall call, int i, ParamNode p) {
    p.isParameterOf(viableCallableExt(call), i)
  }

@@ -270,11 +353,11 @@ private module Cached {
   * dispatch into account.
   */
  cached
-  predicate viableParamArg(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
+  predicate viableParamArg(DataFlowCall call, ParamNode p, ArgNode arg) {
    exists(int i |
      viableParam(call, i, p) and
      arg.argumentOf(call, i) and
-      compatibleTypes(getNodeType(arg), getNodeType(p))
+      compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(p))
    )
  }

@@ -312,7 +395,7 @@ private module Cached {
       * `read` indicates whether it is contents of `p` that can flow to `node`.
       */
      pragma[nomagic]
-      private predicate parameterValueFlowCand(ParameterNode p, Node node, boolean read) {
+      private predicate parameterValueFlowCand(ParamNode p, Node node, boolean read) {
        p = node and
        read = false
        or
@@ -325,30 +408,30 @@ private module Cached {
        // read
        exists(Node mid |
          parameterValueFlowCand(p, mid, false) and
-          readStep(mid, _, node) and
+          read(mid, _, node) and
          read = true
        )
        or
        // flow through: no prior read
-        exists(ArgumentNode arg |
+        exists(ArgNode arg |
          parameterValueFlowArgCand(p, arg, false) and
          argumentValueFlowsThroughCand(arg, node, read)
        )
        or
        // flow through: no read inside method
-        exists(ArgumentNode arg |
+        exists(ArgNode arg |
          parameterValueFlowArgCand(p, arg, read) and
          argumentValueFlowsThroughCand(arg, node, false)
        )
      }

      pragma[nomagic]
-      private predicate parameterValueFlowArgCand(ParameterNode p, ArgumentNode arg, boolean read) {
+      private predicate parameterValueFlowArgCand(ParamNode p, ArgNode arg, boolean read) {
        parameterValueFlowCand(p, arg, read)
      }

      pragma[nomagic]
-      predicate parameterValueFlowsToPreUpdateCand(ParameterNode p, PostUpdateNode n) {
+      predicate parameterValueFlowsToPreUpdateCand(ParamNode p, PostUpdateNode n) {
        parameterValueFlowCand(p, n.getPreUpdateNode(), false)
      }

@@ -360,7 +443,7 @@ private module Cached {
       * `read` indicates whether it is contents of `p` that can flow to the return
       * node.
       */
-      predicate parameterValueFlowReturnCand(ParameterNode p, ReturnKind kind, boolean read) {
+      predicate parameterValueFlowReturnCand(ParamNode p, ReturnKind kind, boolean read) {
        exists(ReturnNode ret |
          parameterValueFlowCand(p, ret, read) and
          kind = ret.getKind()
@@ -369,9 +452,9 @@ private module Cached {

      pragma[nomagic]
      private predicate argumentValueFlowsThroughCand0(
-        DataFlowCall call, ArgumentNode arg, ReturnKind kind, boolean read
+        DataFlowCall call, ArgNode arg, ReturnKind kind, boolean read
      ) {
-        exists(ParameterNode param | viableParamArg(call, param, arg) |
+        exists(ParamNode param | viableParamArg(call, param, arg) |
          parameterValueFlowReturnCand(param, kind, read)
        )
      }
@@ -382,14 +465,14 @@ private module Cached {
       *
       * `read` indicates whether it is contents of `arg` that can flow to `out`.
       */
-      predicate argumentValueFlowsThroughCand(ArgumentNode arg, Node out, boolean read) {
+      predicate argumentValueFlowsThroughCand(ArgNode arg, Node out, boolean read) {
        exists(DataFlowCall call, ReturnKind kind |
          argumentValueFlowsThroughCand0(call, arg, kind, read) and
          out = getAnOutNode(call, kind)
        )
      }

-      predicate cand(ParameterNode p, Node n) {
+      predicate cand(ParamNode p, Node n) {
        parameterValueFlowCand(p, n, _) and
        (
          parameterValueFlowReturnCand(p, _, _)
@@ -416,21 +499,21 @@ private module Cached {
       * If a read step was taken, then `read` captures the `Content`, the
       * container type, and the content type.
       */
-      predicate parameterValueFlow(ParameterNode p, Node node, ReadStepTypesOption read) {
+      predicate parameterValueFlow(ParamNode p, Node node, ReadStepTypesOption read) {
        parameterValueFlow0(p, node, read) and
        if node instanceof CastingNode
        then
          // normal flow through
          read = TReadStepTypesNone() and
-          compatibleTypes(getNodeType(p), getNodeType(node))
+          compatibleTypes(getNodeDataFlowType(p), getNodeDataFlowType(node))
          or
          // getter
-          compatibleTypes(read.getContentType(), getNodeType(node))
+          compatibleTypes(read.getContentType(), getNodeDataFlowType(node))
        else any()
      }

      pragma[nomagic]
-      private predicate parameterValueFlow0(ParameterNode p, Node node, ReadStepTypesOption read) {
+      private predicate parameterValueFlow0(ParamNode p, Node node, ReadStepTypesOption read) {
        p = node and
        Cand::cand(p, _) and
        read = TReadStepTypesNone()
@@ -447,7 +530,7 @@ private module Cached {
          readStepWithTypes(mid, read.getContainerType(), read.getContent(), node,
            read.getContentType()) and
          Cand::parameterValueFlowReturnCand(p, _, true) and
-          compatibleTypes(getNodeType(p), read.getContainerType())
+          compatibleTypes(getNodeDataFlowType(p), read.getContainerType())
        )
        or
        parameterValueFlow0_0(TReadStepTypesNone(), p, node, read)
@@ -455,34 +538,32 @@ private module Cached {

      pragma[nomagic]
      private predicate parameterValueFlow0_0(
-        ReadStepTypesOption mustBeNone, ParameterNode p, Node node, ReadStepTypesOption read
+        ReadStepTypesOption mustBeNone, ParamNode p, Node node, ReadStepTypesOption read
      ) {
        // flow through: no prior read
-        exists(ArgumentNode arg |
+        exists(ArgNode arg |
          parameterValueFlowArg(p, arg, mustBeNone) and
          argumentValueFlowsThrough(arg, read, node)
        )
        or
        // flow through: no read inside method
-        exists(ArgumentNode arg |
+        exists(ArgNode arg |
          parameterValueFlowArg(p, arg, read) and
          argumentValueFlowsThrough(arg, mustBeNone, node)
        )
      }

      pragma[nomagic]
-      private predicate parameterValueFlowArg(
-        ParameterNode p, ArgumentNode arg, ReadStepTypesOption read
-      ) {
+      private predicate parameterValueFlowArg(ParamNode p, ArgNode arg, ReadStepTypesOption read) {
        parameterValueFlow(p, arg, read) and
        Cand::argumentValueFlowsThroughCand(arg, _, _)
      }

      pragma[nomagic]
      private predicate argumentValueFlowsThrough0(
-        DataFlowCall call, ArgumentNode arg, ReturnKind kind, ReadStepTypesOption read
+        DataFlowCall call, ArgNode arg, ReturnKind kind, ReadStepTypesOption read
      ) {
-        exists(ParameterNode param | viableParamArg(call, param, arg) |
+        exists(ParamNode param | viableParamArg(call, param, arg) |
          parameterValueFlowReturn(param, kind, read)
        )
      }
@@ -496,18 +577,18 @@ private module Cached {
       * container type, and the content type.
       */
      pragma[nomagic]
-      predicate argumentValueFlowsThrough(ArgumentNode arg, ReadStepTypesOption read, Node out) {
+      predicate argumentValueFlowsThrough(ArgNode arg, ReadStepTypesOption read, Node out) {
        exists(DataFlowCall call, ReturnKind kind |
          argumentValueFlowsThrough0(call, arg, kind, read) and
          out = getAnOutNode(call, kind)
        |
          // normal flow through
          read = TReadStepTypesNone() and
-          compatibleTypes(getNodeType(arg), getNodeType(out))
+          compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(out))
          or
          // getter
-          compatibleTypes(getNodeType(arg), read.getContainerType()) and
-          compatibleTypes(read.getContentType(), getNodeType(out))
+          compatibleTypes(getNodeDataFlowType(arg), read.getContainerType()) and
+          compatibleTypes(read.getContentType(), getNodeDataFlowType(out))
        )
      }

@@ -516,7 +597,7 @@ private module Cached {
       * value-preserving steps and a single read step, not taking call
       * contexts into account, thus representing a getter-step.
       */
-      predicate getterStep(ArgumentNode arg, Content c, Node out) {
+      predicate getterStep(ArgNode arg, Content c, Node out) {
        argumentValueFlowsThrough(arg, TReadStepTypesSome(_, c, _), out)
      }

@@ -529,7 +610,7 @@ private module Cached {
       * container type, and the content type.
       */
      private predicate parameterValueFlowReturn(
-        ParameterNode p, ReturnKind kind, ReadStepTypesOption read
+        ParamNode p, ReturnKind kind, ReadStepTypesOption read
      ) {
        exists(ReturnNode ret |
          parameterValueFlow(p, ret, read) and
@@ -553,7 +634,7 @@ private module Cached {
    private predicate mayBenefitFromCallContextExt(DataFlowCall call, DataFlowCallable callable) {
      mayBenefitFromCallContext(call, callable)
      or
-      callable = call.getEnclosingCallable() and
+      callEnclosingCallable(call, callable) and
      exists(viableCallableLambda(call, TDataFlowCallSome(_)))
    }

@@ -611,7 +692,7 @@ private module Cached {
        mayBenefitFromCallContextExt(call, _) and
        c = viableCallableExt(call) and
        ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContextExt(call, ctx)) and
-        tgts = strictcount(DataFlowCall ctx | viableCallableExt(ctx) = call.getEnclosingCallable()) and
+        tgts = strictcount(DataFlowCall ctx | callEnclosingCallable(call, viableCallableExt(ctx))) and
        ctxtgts < tgts
      )
    }
@@ -635,8 +716,7 @@ private module Cached {
   * Holds if `p` can flow to the pre-update node associated with post-update
   * node `n`, in the same callable, using only value-preserving steps.
   */
-  cached
-  predicate parameterValueFlowsToPreUpdate(ParameterNode p, PostUpdateNode n) {
+  private predicate parameterValueFlowsToPreUpdate(ParamNode p, PostUpdateNode n) {
    parameterValueFlow(p, n.getPreUpdateNode(), TReadStepTypesNone())
  }

@@ -644,9 +724,9 @@ private module Cached {
    Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
  ) {
    storeStep(node1, c, node2) and
-    readStep(_, c, _) and
-    contentType = getNodeType(node1) and
-    containerType = getNodeType(node2)
+    read(_, c, _) and
+    contentType = getNodeDataFlowType(node1) and
+    containerType = getNodeDataFlowType(node2)
    or
    exists(Node n1, Node n2 |
      n1 = node1.(PostUpdateNode).getPreUpdateNode() and
@@ -654,12 +734,15 @@ private module Cached {
    |
      argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1)
      or
-      readStep(n2, c, n1) and
-      contentType = getNodeType(n1) and
-      containerType = getNodeType(n2)
+      read(n2, c, n1) and
+      contentType = getNodeDataFlowType(n1) and
+      containerType = getNodeDataFlowType(n2)
    )
  }

+  cached
+  predicate read(Node node1, Content c, Node node2) { readStep(node1, c, node2) }
+
  /**
   * Holds if data can flow from `node1` to `node2` via a direct assignment to
   * `f`.
@@ -678,8 +761,9 @@ private module Cached {
   * are aliases. A typical example is a function returning `this`, implementing a fluent
   * interface.
   */
-  cached
-  predicate reverseStepThroughInputOutputAlias(PostUpdateNode fromNode, PostUpdateNode toNode) {
+  private predicate reverseStepThroughInputOutputAlias(
+    PostUpdateNode fromNode, PostUpdateNode toNode
+  ) {
    exists(Node fromPre, Node toPre |
      fromPre = fromNode.getPreUpdateNode() and
      toPre = toNode.getPreUpdateNode()
@@ -688,14 +772,20 @@ private module Cached {
        // Does the language-specific simpleLocalFlowStep already model flow
        // from function input to output?
        fromPre = getAnOutNode(c, _) and
-        toPre.(ArgumentNode).argumentOf(c, _) and
-        simpleLocalFlowStep(toPre.(ArgumentNode), fromPre)
+        toPre.(ArgNode).argumentOf(c, _) and
+        simpleLocalFlowStep(toPre.(ArgNode), fromPre)
      )
      or
      argumentValueFlowsThrough(toPre, TReadStepTypesNone(), fromPre)
    )
  }

+  cached
+  predicate simpleLocalFlowStepExt(Node node1, Node node2) {
+    simpleLocalFlowStep(node1, node2) or
+    reverseStepThroughInputOutputAlias(node1, node2)
+  }
+
  /**
   * Holds if the call context `call` either improves virtual dispatch in
   * `callable` or if it allows us to prune unreachable nodes in `callable`.
@@ -704,7 +794,7 @@ private module Cached {
  predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
    reducedViableImplInCallContext(_, callable, call)
    or
-    exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCall(n, call))
+    exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCallCached(n, call))
  }

  cached
@@ -726,12 +816,12 @@ private module Cached {
  cached
  newtype TLocalFlowCallContext =
    TAnyLocalCall() or
-    TSpecificLocalCall(DataFlowCall call) { isUnreachableInCall(_, call) }
+    TSpecificLocalCall(DataFlowCall call) { isUnreachableInCallCached(_, call) }

  cached
  newtype TReturnKindExt =
    TValueReturn(ReturnKind kind) or
-    TParamUpdate(int pos) { exists(ParameterNode p | p.isParameterOf(_, pos)) }
+    TParamUpdate(int pos) { exists(ParamNode p | p.isParameterOf(_, pos)) }

  cached
  newtype TBooleanOption =
@@ -761,23 +851,15 @@ private module Cached {
 * A `Node` at which a cast can occur such that the type should be checked.
 */
 class CastingNode extends Node {
-  CastingNode() {
-    this instanceof ParameterNode or
-    this instanceof CastNode or
-    this instanceof OutNodeExt or
-    // For reads, `x.f`, we want to check that the tracked type after the read (which
-    // is obtained by popping the head of the access path stack) is compatible with
-    // the type of `x.f`.
-    readStep(_, _, this)
-  }
+  CastingNode() { castingNode(this) }
 }

 private predicate readStepWithTypes(
  Node n1, DataFlowType container, Content c, Node n2, DataFlowType content
 ) {
-  readStep(n1, c, n2) and
-  container = getNodeType(n1) and
-  content = getNodeType(n2)
+  read(n1, c, n2) and
+  container = getNodeDataFlowType(n1) and
+  content = getNodeDataFlowType(n2)
 }

 private newtype TReadStepTypesOption =
@@ -854,7 +936,7 @@ class CallContextSomeCall extends CallContextCall, TSomeCall {
  override string toString() { result = "CcSomeCall" }

  override predicate relevantFor(DataFlowCallable callable) {
-    exists(ParameterNode p | getNodeEnclosingCallable(p) = callable)
+    exists(ParamNode p | getNodeEnclosingCallable(p) = callable)
  }

  override predicate matchesCall(DataFlowCall call) { any() }
@@ -866,7 +948,7 @@ class CallContextReturn extends CallContextNoCall, TReturn {
  }

  override predicate relevantFor(DataFlowCallable callable) {
-    exists(DataFlowCall call | this = TReturn(_, call) and call.getEnclosingCallable() = callable)
+    exists(DataFlowCall call | this = TReturn(_, call) and callEnclosingCallable(call, callable))
  }
 }

@@ -899,7 +981,7 @@ class LocalCallContextSpecificCall extends LocalCallContext, TSpecificLocalCall
 }

 private predicate relevantLocalCCtx(DataFlowCall call, DataFlowCallable callable) {
-  exists(Node n | getNodeEnclosingCallable(n) = callable and isUnreachableInCall(n, call))
+  exists(Node n | getNodeEnclosingCallable(n) = callable and isUnreachableInCallCached(n, call))
 }

 /**
@@ -913,26 +995,37 @@ LocalCallContext getLocalCallContext(CallContext ctx, DataFlowCallable callable)
  else result instanceof LocalCallContextAny
 }

+/**
+ * The value of a parameter at function entry, viewed as a node in a data
+ * flow graph.
+ */
+class ParamNode extends Node {
+  ParamNode() { parameterNode(this, _, _) }
+
+  /**
+   * Holds if this node is the parameter of callable `c` at the specified
+   * (zero-based) position.
+   */
+  predicate isParameterOf(DataFlowCallable c, int i) { parameterNode(this, c, i) }
+}
+
+/** A data-flow node that represents a call argument. */
+class ArgNode extends Node {
+  ArgNode() { argumentNode(this, _, _) }
+
+  /** Holds if this argument occurs at the given position in the given call. */
+  final predicate argumentOf(DataFlowCall call, int pos) { argumentNode(this, call, pos) }
+}
+
 /**
 * A node from which flow can return to the caller. This is either a regular
 * `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter.
 */
 class ReturnNodeExt extends Node {
-  ReturnNodeExt() {
-    this instanceof ReturnNode or
-    parameterValueFlowsToPreUpdate(_, this)
-  }
+  ReturnNodeExt() { returnNodeExt(this, _) }

  /** Gets the kind of this returned value. */
-  ReturnKindExt getKind() {
-    result = TValueReturn(this.(ReturnNode).getKind())
-    or
-    exists(ParameterNode p, int pos |
-      parameterValueFlowsToPreUpdate(p, this) and
-      p.isParameterOf(_, pos) and
-      result = TParamUpdate(pos)
-    )
-  }
+  ReturnKindExt getKind() { returnNodeExt(this, result) }
 }

 /**
@@ -940,11 +1033,7 @@ class ReturnNodeExt extends Node {
 * or a post-update node associated with a call argument.
 */
 class OutNodeExt extends Node {
-  OutNodeExt() {
-    this instanceof OutNode
-    or
-    this.(PostUpdateNode).getPreUpdateNode() instanceof ArgumentNode
-  }
+  OutNodeExt() { outNodeExt(this) }
 }

 /**
@@ -957,7 +1046,7 @@ abstract class ReturnKindExt extends TReturnKindExt {
  abstract string toString();

  /** Gets a node corresponding to data flow out of `call`. */
-  abstract OutNodeExt getAnOutNode(DataFlowCall call);
+  final OutNodeExt getAnOutNode(DataFlowCall call) { result = getAnOutNodeExt(call, this) }
 }

 class ValueReturnKind extends ReturnKindExt, TValueReturn {
@@ -968,10 +1057,6 @@ class ValueReturnKind extends ReturnKindExt, TValueReturn {
  ReturnKind getKind() { result = kind }

  override string toString() { result = kind.toString() }
-
-  override OutNodeExt getAnOutNode(DataFlowCall call) {
-    result = getAnOutNode(call, this.getKind())
-  }
 }

 class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
@@ -982,13 +1067,6 @@ class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
  int getPosition() { result = pos }

  override string toString() { result = "param update " + pos }
-
-  override OutNodeExt getAnOutNode(DataFlowCall call) {
-    exists(ArgumentNode arg |
-      result.(PostUpdateNode).getPreUpdateNode() = arg and
-      arg.argumentOf(call, this.getPosition())
-    )
-  }
 }

 /** A callable tagged with a relevant return kind. */
@@ -1015,10 +1093,13 @@ class ReturnPosition extends TReturnPosition0 {
 */
 pragma[inline]
 DataFlowCallable getNodeEnclosingCallable(Node n) {
-  exists(Node n0 |
-    pragma[only_bind_into](n0) = n and
-    pragma[only_bind_into](result) = n0.getEnclosingCallable()
-  )
+  nodeEnclosingCallable(pragma[only_bind_out](n), pragma[only_bind_into](result))
+}
+
+/** Gets the type of `n` used for type pruning. */
+pragma[inline]
+DataFlowType getNodeDataFlowType(Node n) {
+  nodeDataFlowType(pragma[only_bind_out](n), pragma[only_bind_into](result))
 }

 pragma[noinline]
@@ -1042,7 +1123,7 @@ predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall
  cc instanceof CallContextAny and callable = viableCallableExt(call)
  or
  exists(DataFlowCallable c0, DataFlowCall call0 |
-    call0.getEnclosingCallable() = callable and
+    callEnclosingCallable(call0, callable) and
    cc = TReturn(c0, call0) and
    c0 = prunedViableImplInCallContextReverse(call0, call)
  )
@@ -1063,8 +1144,6 @@ DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
  result = viableCallableExt(call) and cc instanceof CallContextReturn
 }

-predicate read = readStep/3;
-
 /** An optional Boolean value. */
 class BooleanOption extends TBooleanOption {
  string toString() {
@@ -1116,7 +1195,7 @@ abstract class AccessPathFront extends TAccessPathFront {

  TypedContent getHead() { this = TFrontHead(result) }

-  predicate isClearedAt(Node n) { clearsContent(n, getHead().getContent()) }
+  predicate isClearedAt(Node n) { clearsContentCached(n, getHead().getContent()) }
 }

 class AccessPathFrontNil extends AccessPathFront, TFrontNil {
--- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll
+++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll
@@ -168,7 +168,13 @@ module Consistency {
    msg = "ArgumentNode is missing PostUpdateNode."
  }

-  query predicate postWithInFlow(PostUpdateNode n, string msg) {
+  // This predicate helps the compiler forget that in some languages
+  // it is impossible for a `PostUpdateNode` to be the target of
+  // `simpleLocalFlowStep`.
+  private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() }
+
+  query predicate postWithInFlow(Node n, string msg) {
+    isPostUpdateNode(n) and
    simpleLocalFlowStep(_, n) and
    msg = "PostUpdateNode should not be the target of local flow."
  }
--- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -228,7 +228,6 @@ module EssaFlow {
 * data flow. It is a strict subset of the `localFlowStep` predicate, as it
 * excludes SSA flow through instance fields.
 */
-cached
 predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
  // If there is ESSA-flow out of a node `node`, we want flow
  // both out of `node` and any post-update node of `node`.
@@ -1559,7 +1558,6 @@ predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node no
 * any value stored inside `f` is cleared at the pre-update node associated with `x`
 * in `x.f = newValue`.
 */
-cached
 predicate clearsContent(Node n, Content c) {
  exists(CallNode call, CallableValue callable, string name |
    call_unpacks(call, _, callable, name, _) and
@@ -1617,5 +1615,5 @@ predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c)
 /** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */
 predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { none() }

-/** Extra data-flow steps needed for lamba flow analysis. */
+/** Extra data-flow steps needed for lambda flow analysis. */
 predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
--- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll
+++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll
@@ -119,22 +119,6 @@ class Node extends TNode {
  /** Gets the expression corresponding to this node, if any. */
  Expr asExpr() { none() }

-  /**
-   * Gets a node that this node may flow to using one heap and/or interprocedural step.
-   *
-   * See `TypeTracker` for more details about how to use this.
-   */
-  pragma[inline]
-  Node track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
-
-  /**
-   * Gets a node that may flow into this one using one heap and/or interprocedural step.
-   *
-   * See `TypeBackTracker` for more details about how to use this.
-   */
-  pragma[inline]
-  LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
-
  /**
   * Gets a local source node from which data may flow to this node in zero or more local data-flow steps.
   */
--- a/python/ql/src/semmle/python/dataflow/new/internal/LocalSources.qll
+++ b/python/ql/src/semmle/python/dataflow/new/internal/LocalSources.qll
@@ -10,23 +10,37 @@ import python
 import DataFlowPublic
 private import DataFlowPrivate

-private predicate comes_from_cfgnode(Node node) {
-  exists(CfgNode first, Node second |
-    simpleLocalFlowStep(first, second) and
-    simpleLocalFlowStep*(second, node)
-  )
-}
-
 /**
 * A data flow node that is a source of local flow. This includes things like
 * - Expressions
 * - Function parameters
+ *
+ *
+ * Local source nodes and the `flowsTo` relation should be thought of in terms of the reference
+ * semantics of the underlying object. For instance, in the following snippet of code
+ *
+ * ```python
+ *     x = []
+ *     x.append(1)
+ *     x.append(2)
+ * ```
+ *
+ * the local source node corresponding to the occurrences of `x` is the empty list that is assigned to `x`
+ * originally. Even though the two `append` calls modify the value of `x`, they do not change the fact that
+ * `x` still points to the same object. If, however, we next do `x = x + [3]`, then the expression `x + [3]`
+ * will be the new local source of what `x` now points to.
 */
 class LocalSourceNode extends Node {
  cached
  LocalSourceNode() {
-    not comes_from_cfgnode(this) and
-    not this instanceof ModuleVariableNode
+    not simpleLocalFlowStep(_, this) and
+    // Currently, we create synthetic post-update nodes for
+    // - arguments to calls that may modify said argument
+    // - direct reads a writes of object attributes
+    // Both of these preserve the identity of the underlying pointer, and hence we exclude these as
+    // local source nodes.
+    // We do, however, allow the post-update nodes that arise from object creation (which are non-synthetic).
+    not this instanceof SyntheticPostUpdateNode
    or
    this = any(ModuleVariableNode mvn).getARead()
  }
@@ -63,6 +77,22 @@ class LocalSourceNode extends Node {
   * Gets a call to this node.
   */
  CallCfgNode getACall() { Cached::call(this, result) }
+
+  /**
+   * Gets a node that this node may flow to using one heap and/or interprocedural step.
+   *
+   * See `TypeTracker` for more details about how to use this.
+   */
+  pragma[inline]
+  LocalSourceNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
+
+  /**
+   * Gets a node that may flow into this one using one heap and/or interprocedural step.
+   *
+   * See `TypeBackTracker` for more details about how to use this.
+   */
+  pragma[inline]
+  LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
 }

 cached
--- a/python/ql/src/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll
+++ b/python/ql/src/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll
@@ -9,36 +9,42 @@ private import semmle.python.dataflow.new.internal.TaintTrackingPublic
 */
 predicate defaultTaintSanitizer(DataFlow::Node node) { none() }

-/**
- * Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all
- * global taint flow configurations.
- */
-predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
-  localAdditionalTaintStep(nodeFrom, nodeTo)
-  or
-  any(AdditionalTaintStep a).step(nodeFrom, nodeTo)
+private module Cached {
+  /**
+   * Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all
+   * global taint flow configurations.
+   */
+  cached
+  predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+    localAdditionalTaintStep(nodeFrom, nodeTo)
+    or
+    any(AdditionalTaintStep a).step(nodeFrom, nodeTo)
+  }
+
+  /**
+   * Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
+   * local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
+   * different objects.
+   */
+  cached
+  predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+    concatStep(nodeFrom, nodeTo)
+    or
+    subscriptStep(nodeFrom, nodeTo)
+    or
+    stringManipulation(nodeFrom, nodeTo)
+    or
+    containerStep(nodeFrom, nodeTo)
+    or
+    copyStep(nodeFrom, nodeTo)
+    or
+    forStep(nodeFrom, nodeTo)
+    or
+    unpackingAssignmentStep(nodeFrom, nodeTo)
+  }
 }

-/**
- * Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
- * local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
- * different objects.
- */
-predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
-  concatStep(nodeFrom, nodeTo)
-  or
-  subscriptStep(nodeFrom, nodeTo)
-  or
-  stringManipulation(nodeFrom, nodeTo)
-  or
-  containerStep(nodeFrom, nodeTo)
-  or
-  copyStep(nodeFrom, nodeTo)
-  or
-  forStep(nodeFrom, nodeTo)
-  or
-  unpackingAssignmentStep(nodeFrom, nodeTo)
-}
+import Cached

 /**
 * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to concatenation.
--- a/python/ql/src/semmle/python/dataflow/new/internal/TypeTracker.qll
+++ b/python/ql/src/semmle/python/dataflow/new/internal/TypeTracker.qll
@@ -0,0 +1,470 @@
+/** Step Summaries and Type Tracking */
+
+private import TypeTrackerSpecific
+
+/**
+ * Any string that may appear as the name of a piece of content. This will usually include things like:
+ * - Attribute names (in Python)
+ * - Property names (in JavaScript)
+ *
+ * In general, this can also be used to model things like stores to specific list indices. To ensure
+ * correctness, it is important that
+ *
+ * - different types of content do not have overlapping names, and
+ * - the empty string `""` is not a valid piece of content, as it is used to indicate the absence of
+ *   content instead.
+ */
+class ContentName extends string {
+  ContentName() { this = getPossibleContentName() }
+}
+
+/** Either a content name, or the empty string (representing no content). */
+class OptionalContentName extends string {
+  OptionalContentName() { this instanceof ContentName or this = "" }
+}
+
+cached
+private module Cached {
+  /**
+   * A description of a step on an inter-procedural data flow path.
+   */
+  cached
+  newtype TStepSummary =
+    LevelStep() or
+    CallStep() or
+    ReturnStep() or
+    StoreStep(ContentName content) or
+    LoadStep(ContentName content)
+
+  /** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */
+  cached
+  TypeTracker append(TypeTracker tt, StepSummary step) {
+    exists(Boolean hasCall, OptionalContentName content | tt = MkTypeTracker(hasCall, content) |
+      step = LevelStep() and result = tt
+      or
+      step = CallStep() and result = MkTypeTracker(true, content)
+      or
+      step = ReturnStep() and hasCall = false and result = tt
+      or
+      step = LoadStep(content) and result = MkTypeTracker(hasCall, "")
+      or
+      exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p))
+    )
+  }
+
+  /**
+   * Gets the summary that corresponds to having taken a forwards
+   * heap and/or intra-procedural step from `nodeFrom` to `nodeTo`.
+   *
+   * Steps contained in this predicate should _not_ depend on the call graph.
+   */
+  cached
+  predicate stepNoCall(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
+    exists(Node mid | nodeFrom.flowsTo(mid) and smallstepNoCall(mid, nodeTo, summary))
+  }
+
+  /**
+   * Gets the summary that corresponds to having taken a forwards
+   * inter-procedural step from `nodeFrom` to `nodeTo`.
+   */
+  cached
+  predicate stepCall(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
+    exists(Node mid | nodeFrom.flowsTo(mid) and smallstepCall(mid, nodeTo, summary))
+  }
+}
+
+private import Cached
+
+/**
+ * INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
+ *
+ * A description of a step on an inter-procedural data flow path.
+ */
+class StepSummary extends TStepSummary {
+  /** Gets a textual representation of this step summary. */
+  string toString() {
+    this instanceof LevelStep and result = "level"
+    or
+    this instanceof CallStep and result = "call"
+    or
+    this instanceof ReturnStep and result = "return"
+    or
+    exists(string content | this = StoreStep(content) | result = "store " + content)
+    or
+    exists(string content | this = LoadStep(content) | result = "load " + content)
+  }
+}
+
+pragma[noinline]
+private predicate smallstepNoCall(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
+  jumpStep(nodeFrom, nodeTo) and
+  summary = LevelStep()
+  or
+  exists(string content |
+    StepSummary::localSourceStoreStep(nodeFrom, nodeTo, content) and
+    summary = StoreStep(content)
+    or
+    basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
+  )
+}
+
+pragma[noinline]
+private predicate smallstepCall(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
+  callStep(nodeFrom, nodeTo) and summary = CallStep()
+  or
+  returnStep(nodeFrom, nodeTo) and
+  summary = ReturnStep()
+}
+
+/** Provides predicates for updating step summaries (`StepSummary`s). */
+module StepSummary {
+  /**
+   * Gets the summary that corresponds to having taken a forwards
+   * heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+   *
+   * This predicate is inlined, which enables better join-orders when
+   * the call graph construction and type tracking are mutually recursive.
+   * In such cases, non-linear recursion involving `step` will be limited
+   * to non-linear recursion for the parts of `step` that involve the
+   * call graph.
+   */
+  pragma[inline]
+  predicate step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
+    stepNoCall(nodeFrom, nodeTo, summary)
+    or
+    stepCall(nodeFrom, nodeTo, summary)
+  }
+
+  /**
+   * Gets the summary that corresponds to having taken a forwards
+   * local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+   *
+   * Unlike `StepSummary::step`, this predicate does not compress
+   * type-preserving steps.
+   */
+  pragma[inline]
+  predicate smallstep(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
+    smallstepNoCall(nodeFrom, nodeTo, summary)
+    or
+    smallstepCall(nodeFrom, nodeTo, summary)
+  }
+
+  /**
+   * Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
+   *
+   * Note that `nodeTo` will always be a local source node that flows to the place where the content
+   * is written in `basicStoreStep`. This may lead to the flow of information going "back in time"
+   * from the point of view of the execution of the program.
+   *
+   * For instance, if we interpret attribute writes in Python as writing to content with the same
+   * name as the attribute and consider the following snippet
+   *
+   * ```python
+   * def foo(y):
+   *    x = Foo()
+   *    bar(x)
+   *    x.attr = y
+   *    baz(x)
+   *
+   * def bar(x):
+   *    z = x.attr
+   * ```
+   * for the attribute write `x.attr = y`, we will have `content` being the literal string `"attr"`,
+   * `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
+   * function. This means we will track the fact that `x.attr` can have the type of `y` into the
+   * assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
+   */
+  predicate localSourceStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string content) {
+    exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
+  }
+}
+
+private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentName content)
+
+/**
+ * Summary of the steps needed to track a value to a given dataflow node.
+ *
+ * This can be used to track objects that implement a certain API in order to
+ * recognize calls to that API. Note that type-tracking does not by itself provide a
+ * source/sink relation, that is, it may determine that a node has a given type,
+ * but it won't determine where that type came from.
+ *
+ * It is recommended that all uses of this type are written in the following form,
+ * for tracking some type `myType`:
+ * ```ql
+ * DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
+ *   t.start() and
+ *   result = < source of myType >
+ *   or
+ *   exists (DataFlow::TypeTracker t2 |
+ *     result = myType(t2).track(t2, t)
+ *   )
+ * }
+ *
+ * DataFlow::Node myType() { myType(DataFlow::TypeTracker::end()).flowsTo(result) }
+ * ```
+ *
+ * Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
+ * `t = t2.step(myType(t2), result)`. If you additionally want to track individual
+ * intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
+ */
+class TypeTracker extends TTypeTracker {
+  Boolean hasCall;
+  OptionalContentName content;
+
+  TypeTracker() { this = MkTypeTracker(hasCall, content) }
+
+  /** Gets the summary resulting from appending `step` to this type-tracking summary. */
+  TypeTracker append(StepSummary step) { result = append(this, step) }
+
+  /** Gets a textual representation of this summary. */
+  string toString() {
+    exists(string withCall, string withContent |
+      (if hasCall = true then withCall = "with" else withCall = "without") and
+      (if content != "" then withContent = " with content " + content else withContent = "") and
+      result = "type tracker " + withCall + " call steps" + withContent
+    )
+  }
+
+  /**
+   * Holds if this is the starting point of type tracking.
+   */
+  predicate start() { hasCall = false and content = "" }
+
+  /**
+   * Holds if this is the starting point of type tracking, and the value starts in the content named `contentName`.
+   * The type tracking only ends after the content has been loaded.
+   */
+  predicate startInContent(ContentName contentName) { hasCall = false and content = contentName }
+
+  /**
+   * Holds if this is the starting point of type tracking
+   * when tracking a parameter into a call, but not out of it.
+   */
+  predicate call() { hasCall = true and content = "" }
+
+  /**
+   * Holds if this is the end point of type tracking.
+   */
+  predicate end() { content = "" }
+
+  /**
+   * INTERNAL. DO NOT USE.
+   *
+   * Holds if this type has been tracked into a call.
+   */
+  boolean hasCall() { result = hasCall }
+
+  /**
+   * INTERNAL. DO NOT USE.
+   *
+   * Gets the content associated with this type tracker.
+   */
+  string getContent() { result = content }
+
+  /**
+   * Gets a type tracker that starts where this one has left off to allow continued
+   * tracking.
+   *
+   * This predicate is only defined if the type is not associated to a piece of content.
+   */
+  TypeTracker continue() { content = "" and result = this }
+
+  /**
+   * Gets the summary that corresponds to having taken a forwards
+   * heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+   */
+  pragma[inline]
+  TypeTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
+    exists(StepSummary summary |
+      StepSummary::step(nodeFrom, pragma[only_bind_out](nodeTo), pragma[only_bind_into](summary)) and
+      result = this.append(pragma[only_bind_into](summary))
+    )
+  }
+
+  /**
+   * Gets the summary that corresponds to having taken a forwards
+   * local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+   *
+   * Unlike `TypeTracker::step`, this predicate exposes all edges
+   * in the flow graph, and not just the edges between `Node`s.
+   * It may therefore be less performant.
+   *
+   * Type tracking predicates using small steps typically take the following form:
+   * ```ql
+   * DataFlow::Node myType(DataFlow::TypeTracker t) {
+   *   t.start() and
+   *   result = < source of myType >
+   *   or
+   *   exists (DataFlow::TypeTracker t2 |
+   *     t = t2.smallstep(myType(t2), result)
+   *   )
+   * }
+   *
+   * DataFlow::Node myType() {
+   *   result = myType(DataFlow::TypeTracker::end())
+   * }
+   * ```
+   */
+  pragma[inline]
+  TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
+    exists(StepSummary summary |
+      StepSummary::smallstep(nodeFrom, nodeTo, summary) and
+      result = this.append(summary)
+    )
+    or
+    simpleLocalFlowStep(nodeFrom, nodeTo) and
+    result = this
+  }
+}
+
+/** Provides predicates for implementing custom `TypeTracker`s. */
+module TypeTracker {
+  /**
+   * Gets a valid end point of type tracking.
+   */
+  TypeTracker end() { result.end() }
+}
+
+private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalContentName content)
+
+/**
+ * Summary of the steps needed to back-track a use of a value to a given dataflow node.
+ *
+ * This can for example be used to track callbacks that are passed to a certain API,
+ * so we can model specific parameters of that callback as having a certain type.
+ *
+ * Note that type back-tracking does not provide a source/sink relation, that is,
+ * it may determine that a node will be used in an API call somewhere, but it won't
+ * determine exactly where that use was, or the path that led to the use.
+ *
+ * It is recommended that all uses of this type are written in the following form,
+ * for back-tracking some callback type `myCallback`:
+ *
+ * ```ql
+ * DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
+ *   t.start() and
+ *   result = (< some API call >).getArgument(< n >).getALocalSource()
+ *   or
+ *   exists (DataFlow::TypeBackTracker t2 |
+ *     result = myCallback(t2).backtrack(t2, t)
+ *   )
+ * }
+ *
+ * DataFlow::LocalSourceNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
+ * ```
+ *
+ * Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent
+ * `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual
+ * intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`.
+ */
+class TypeBackTracker extends TTypeBackTracker {
+  Boolean hasReturn;
+  string content;
+
+  TypeBackTracker() { this = MkTypeBackTracker(hasReturn, content) }
+
+  /** Gets the summary resulting from prepending `step` to this type-tracking summary. */
+  TypeBackTracker prepend(StepSummary step) {
+    step = LevelStep() and result = this
+    or
+    step = CallStep() and hasReturn = false and result = this
+    or
+    step = ReturnStep() and result = MkTypeBackTracker(true, content)
+    or
+    exists(string p |
+      step = LoadStep(p) and content = "" and result = MkTypeBackTracker(hasReturn, p)
+    )
+    or
+    step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "")
+  }
+
+  /** Gets a textual representation of this summary. */
+  string toString() {
+    exists(string withReturn, string withContent |
+      (if hasReturn = true then withReturn = "with" else withReturn = "without") and
+      (if content != "" then withContent = " with content " + content else withContent = "") and
+      result = "type back-tracker " + withReturn + " return steps" + withContent
+    )
+  }
+
+  /**
+   * Holds if this is the starting point of type tracking.
+   */
+  predicate start() { hasReturn = false and content = "" }
+
+  /**
+   * Holds if this is the end point of type tracking.
+   */
+  predicate end() { content = "" }
+
+  /**
+   * INTERNAL. DO NOT USE.
+   *
+   * Holds if this type has been back-tracked into a call through return edge.
+   */
+  boolean hasReturn() { result = hasReturn }
+
+  /**
+   * Gets a type tracker that starts where this one has left off to allow continued
+   * tracking.
+   *
+   * This predicate is only defined if the type has not been tracked into a piece of content.
+   */
+  TypeBackTracker continue() { content = "" and result = this }
+
+  /**
+   * Gets the summary that corresponds to having taken a backwards
+   * heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
+   */
+  pragma[inline]
+  TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
+    exists(StepSummary summary |
+      StepSummary::step(pragma[only_bind_out](nodeFrom), nodeTo, pragma[only_bind_into](summary)) and
+      this = result.prepend(pragma[only_bind_into](summary))
+    )
+  }
+
+  /**
+   * Gets the summary that corresponds to having taken a backwards
+   * local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
+   *
+   * Unlike `TypeBackTracker::step`, this predicate exposes all edges
+   * in the flowgraph, and not just the edges between
+   * `LocalSourceNode`s. It may therefore be less performant.
+   *
+   * Type tracking predicates using small steps typically take the following form:
+   * ```ql
+   * DataFlow::Node myType(DataFlow::TypeBackTracker t) {
+   *   t.start() and
+   *   result = < some API call >.getArgument(< n >)
+   *   or
+   *   exists (DataFlow::TypeBackTracker t2 |
+   *     t = t2.smallstep(result, myType(t2))
+   *   )
+   * }
+   *
+   * DataFlow::Node myType() {
+   *   result = myType(DataFlow::TypeBackTracker::end())
+   * }
+   * ```
+   */
+  pragma[inline]
+  TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) {
+    exists(StepSummary summary |
+      StepSummary::smallstep(nodeFrom, nodeTo, summary) and
+      this = result.prepend(summary)
+    )
+    or
+    simpleLocalFlowStep(nodeFrom, nodeTo) and
+    this = result
+  }
+}
+
+/** Provides predicates for implementing custom `TypeBackTracker`s. */
+module TypeBackTracker {
+  /**
+   * Gets a valid end point of type back-tracking.
+   */
+  TypeBackTracker end() { result.end() }
+}
--- a/python/ql/src/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
+++ b/python/ql/src/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
@@ -0,0 +1,82 @@
+/**
+ * Provides Python-specific definitions for use in the type tracker library.
+ */
+
+private import python
+private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
+private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
+
+class Node = DataFlowPublic::Node;
+
+class LocalSourceNode = DataFlowPublic::LocalSourceNode;
+
+predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStep/2;
+
+predicate jumpStep = DataFlowPrivate::jumpStep/2;
+
+/**
+ * Gets the name of a possible piece of content. For Python, this is currently only attribute names,
+ * using the name of the attribute for the corresponding content.
+ */
+string getPossibleContentName() { result = any(DataFlowPublic::AttrRef a).getAttributeName() }
+
+/**
+ * Gets a callable for the call where `nodeFrom` is used as the `i`'th argument.
+ *
+ * Helper predicate to avoid bad join order experienced in `callStep`.
+ * This happened when `isParameterOf` was joined _before_ `getCallable`.
+ */
+pragma[nomagic]
+private DataFlowPrivate::DataFlowCallable getCallableForArgument(
+  DataFlowPublic::ArgumentNode nodeFrom, int i
+) {
+  exists(DataFlowPrivate::DataFlowCall call |
+    nodeFrom.argumentOf(call, i) and
+    result = call.getCallable()
+  )
+}
+
+/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
+predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPublic::ParameterNode nodeTo) {
+  // TODO: Support special methods?
+  exists(DataFlowPrivate::DataFlowCallable callable, int i |
+    callable = getCallableForArgument(nodeFrom, i) and
+    nodeTo.isParameterOf(callable, i)
+  )
+}
+
+/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
+predicate returnStep(DataFlowPrivate::ReturnNode nodeFrom, Node nodeTo) {
+  exists(DataFlowPrivate::DataFlowCall call |
+    nodeFrom.getEnclosingCallable() = call.getCallable() and nodeTo.asCfgNode() = call.getNode()
+  )
+}
+
+/**
+ * Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
+ */
+predicate basicStoreStep(Node nodeFrom, Node nodeTo, string content) {
+  exists(DataFlowPublic::AttrWrite a |
+    a.mayHaveAttributeName(content) and
+    nodeFrom = a.getValue() and
+    nodeTo = a.getObject()
+  )
+}
+
+/**
+ * Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`.
+ */
+predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) {
+  exists(DataFlowPublic::AttrRead a |
+    a.mayHaveAttributeName(content) and
+    nodeFrom = a.getObject() and
+    nodeTo = a
+  )
+}
+
+/**
+ * A utility class that is equivalent to `boolean` but does not require type joining.
+ */
+class Boolean extends boolean {
+  Boolean() { this = true or this = false }
+}
--- a/python/ql/src/semmle/python/frameworks/Aioch.qll
+++ b/python/ql/src/semmle/python/frameworks/Aioch.qll
@@ -0,0 +1,52 @@
+/**
+ * Provides classes modeling security-relevant aspects of the `aioch` PyPI package (an
+ * async-io version of the `clickhouse-driver` PyPI package).
+ *
+ * See https://pypi.org/project/aioch/
+ */
+
+private import python
+private import semmle.python.Concepts
+private import semmle.python.ApiGraphs
+private import semmle.python.frameworks.PEP249
+private import semmle.python.frameworks.ClickhouseDriver
+
+/**
+ * INTERNAL: Do not use.
+ *
+ * Provides models for `aioch` PyPI package (an async-io version of the
+ * `clickhouse-driver` PyPI package).
+ *
+ * See https://pypi.org/project/aioch/
+ */
+module Aioch {
+  /** Provides models for `aioch.Client` class and subclasses. */
+  module Client {
+    /** Gets a reference to the `aioch.Client` class or any subclass. */
+    API::Node subclassRef() {
+      result = API::moduleImport("aioch").getMember("Client").getASubclass*()
+    }
+
+    /** Gets a reference to an instance of `clickhouse_driver.Client` or any subclass. */
+    API::Node instance() { result = subclassRef().getReturn() }
+  }
+
+  /**
+   * A call to any of the the execute methods on a `aioch.Client`, which are just async
+   * versions of the methods in the `clickhouse-driver` PyPI package.
+   *
+   * See
+   * - https://clickhouse-driver.readthedocs.io/en/latest/api.html#clickhouse_driver.Client.execute
+   * - https://clickhouse-driver.readthedocs.io/en/latest/api.html#clickhouse_driver.Client.execute_iter
+   * - https://clickhouse-driver.readthedocs.io/en/latest/api.html#clickhouse_driver.Client.execute_with_progress
+   */
+  class ClientExecuteCall extends SqlExecution::Range, DataFlow::CallCfgNode {
+    ClientExecuteCall() {
+      exists(string methodName | methodName = ClickhouseDriver::getExecuteMethodName() |
+        this = Client::instance().getMember(methodName).getACall()
+      )
+    }
+
+    override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
+  }
+}
--- a/python/ql/src/semmle/python/frameworks/Aiohttp.qll
+++ b/python/ql/src/semmle/python/frameworks/Aiohttp.qll
@@ -0,0 +1,562 @@
+/**
+ * Provides classes modeling security-relevant aspects of the `aiohttp` PyPI package.
+ * See https://docs.aiohttp.org/en/stable/index.html
+ */
+
+private import python
+private import semmle.python.dataflow.new.DataFlow
+private import semmle.python.dataflow.new.RemoteFlowSources
+private import semmle.python.dataflow.new.TaintTracking
+private import semmle.python.Concepts
+private import semmle.python.ApiGraphs
+private import semmle.python.frameworks.internal.PoorMansFunctionResolution
+private import semmle.python.frameworks.internal.SelfRefMixin
+private import semmle.python.frameworks.Multidict
+private import semmle.python.frameworks.Yarl
+
+/**
+ * INTERNAL: Do not use.
+ *
+ * Provides models for the web server part (`aiohttp.web`) of the `aiohttp` PyPI package.
+ * See https://docs.aiohttp.org/en/stable/web.html
+ */
+module AiohttpWebModel {
+  /**
+   * Provides models for the `aiohttp.web.View` class and subclasses.
+   *
+   * See https://docs.aiohttp.org/en/stable/web_reference.html#view.
+   */
+  module View {
+    /** Gets a reference to the `aiohttp.web.View` class or any subclass. */
+    API::Node subclassRef() {
+      result = API::moduleImport("aiohttp").getMember("web").getMember("View").getASubclass*()
+    }
+  }
+
+  // -- route modeling --
+  /** Gets a reference to an `aiohttp.web.Application` instance. */
+  API::Node applicationInstance() {
+    // Not sure whether you're allowed to add routes _after_ starting the app, for
+    // example in the middle of handling a http request... but I'm guessing that for 99%
+    // for all code, not modeling that `request.app` is a reference to an application
+    // should be good enough for the route-setup part of the modeling :+1:
+    result = API::moduleImport("aiohttp").getMember("web").getMember("Application").getReturn()
+  }
+
+  /** Gets a reference to an `aiohttp.web.UrlDispatcher` instance. */
+  API::Node urlDispathcerInstance() {
+    result = API::moduleImport("aiohttp").getMember("web").getMember("UrlDispatcher").getReturn()
+    or
+    result = applicationInstance().getMember("router")
+  }
+
+  /**
+   * A route setup in `aiohttp.web`. Since all route-setups can technically use either
+   * coroutines or view-classes as the handler argument (although that's not how you're
+   * **supposed** to do things), we also need to handle this.
+   *
+   * Extend this class to refine existing API models. If you want to model new APIs,
+   * extend `AiohttpRouteSetup::Range` instead.
+   */
+  class AiohttpRouteSetup extends HTTP::Server::RouteSetup::Range {
+    AiohttpRouteSetup::Range range;
+
+    AiohttpRouteSetup() { this = range }
+
+    override Parameter getARoutedParameter() { none() }
+
+    override string getFramework() { result = "aiohttp.web" }
+
+    /** Gets the argument specifying the handler (either a coroutine or a view-class). */
+    DataFlow::Node getHandlerArg() { result = range.getHandlerArg() }
+
+    override DataFlow::Node getUrlPatternArg() { result = range.getUrlPatternArg() }
+
+    /** Gets the view-class that is referenced in the view-class handler argument, if any. */
+    Class getViewClass() { result = range.getViewClass() }
+
+    override Function getARequestHandler() { result = range.getARequestHandler() }
+  }
+
+  /** Provides a class for modeling new aiohttp.web route setups. */
+  private module AiohttpRouteSetup {
+    /**
+     * A route setup in `aiohttp.web`. Since all route-setups can technically use either
+     * coroutines or view-classes as the handler argument (although that's not how you're
+     * **supposed** to do things), we also need to handle this.
+     *
+     * Extend this class to model new APIs. If you want to refine existing API models,
+     * extend `AiohttpRouteSetup` instead.
+     */
+    abstract class Range extends DataFlow::Node {
+      /** Gets the argument used to set the URL pattern. */
+      abstract DataFlow::Node getUrlPatternArg();
+
+      /** Gets the argument specifying the handler (either a coroutine or a view-class). */
+      abstract DataFlow::Node getHandlerArg();
+
+      /** Gets the view-class that is referenced in the view-class handler argument, if any. */
+      Class getViewClass() { result = getBackTrackedViewClass(this.getHandlerArg()) }
+
+      /**
+       * Gets a function that will handle incoming requests for this route, if any.
+       *
+       * NOTE: This will be modified in the near future to have a `RequestHandler` result, instead of a `Function`.
+       */
+      Function getARequestHandler() {
+        this.getHandlerArg() = poorMansFunctionTracker(result)
+        or
+        result = this.getViewClass().(AiohttpViewClass).getARequestHandler()
+      }
+    }
+
+    /**
+     * Gets a reference to a class, that has been backtracked from the view-class handler
+     * argument `origin` (to a route-setup for view-classes).
+     */
+    private DataFlow::LocalSourceNode viewClassBackTracker(
+      DataFlow::TypeBackTracker t, DataFlow::Node origin
+    ) {
+      t.start() and
+      origin = any(Range rs).getHandlerArg() and
+      result = origin.getALocalSource()
+      or
+      exists(DataFlow::TypeBackTracker t2 |
+        result = viewClassBackTracker(t2, origin).backtrack(t2, t)
+      )
+    }
+
+    /**
+     * Gets a reference to a class, that has been backtracked from the view-class handler
+     * argument `origin` (to a route-setup for view-classes).
+     */
+    DataFlow::LocalSourceNode viewClassBackTracker(DataFlow::Node origin) {
+      result = viewClassBackTracker(DataFlow::TypeBackTracker::end(), origin)
+    }
+
+    Class getBackTrackedViewClass(DataFlow::Node origin) {
+      result.getParent() = viewClassBackTracker(origin).asExpr()
+    }
+  }
+
+  /** An aiohttp route setup that uses coroutines (async function) as request handlers. */
+  class AiohttpCoroutineRouteSetup extends AiohttpRouteSetup {
+    AiohttpCoroutineRouteSetup() { this.getHandlerArg() = poorMansFunctionTracker(_) }
+  }
+
+  /** An aiohttp route setup that uses view-classes as request handlers. */
+  class AiohttpViewRouteSetup extends AiohttpRouteSetup {
+    AiohttpViewRouteSetup() { exists(this.getViewClass()) }
+  }
+
+  /**
+   * A route-setup from
+   * - `add_route`, `add_view`, `add_get`, `add_post`, , etc. on an `aiohttp.web.UrlDispatcher`.
+   * - `route`, `view`, `get`, `post`, etc. functions from `aiohttp.web`.
+   */
+  class AiohttpAddRouteCall extends AiohttpRouteSetup::Range, DataFlow::CallCfgNode {
+    /** At what index route arguments starts, so we can handle "route" version together with get/post/... */
+    int routeArgsStart;
+
+    AiohttpAddRouteCall() {
+      exists(string funcName |
+        funcName = HTTP::httpVerbLower() and
+        routeArgsStart = 0
+        or
+        funcName = "view" and
+        routeArgsStart = 0
+        or
+        funcName = "route" and
+        routeArgsStart = 1
+      |
+        this = urlDispathcerInstance().getMember("add_" + funcName).getACall()
+        or
+        this = API::moduleImport("aiohttp").getMember("web").getMember(funcName).getACall()
+      )
+    }
+
+    override DataFlow::Node getUrlPatternArg() {
+      result in [this.getArg(routeArgsStart + 0), this.getArgByName("path")]
+    }
+
+    override DataFlow::Node getHandlerArg() {
+      result in [this.getArg(routeArgsStart + 1), this.getArgByName("handler")]
+    }
+  }
+
+  /** A route-setup using a decorator, such as `route`, `view`, `get`, `post`, etc. on an `aiohttp.web.RouteTableDef`. */
+  class AiohttpDecoratorRouteSetup extends AiohttpRouteSetup::Range, DataFlow::CallCfgNode {
+    /** At what index route arguments starts, so we can handle "route" version together with get/post/... */
+    int routeArgsStart;
+
+    AiohttpDecoratorRouteSetup() {
+      exists(string decoratorName |
+        decoratorName = HTTP::httpVerbLower() and
+        routeArgsStart = 0
+        or
+        decoratorName = "view" and
+        routeArgsStart = 0
+        or
+        decoratorName = "route" and
+        routeArgsStart = 1
+      |
+        this =
+          API::moduleImport("aiohttp")
+              .getMember("web")
+              .getMember("RouteTableDef")
+              .getReturn()
+              .getMember(decoratorName)
+              .getACall()
+      )
+    }
+
+    override DataFlow::Node getUrlPatternArg() {
+      result in [this.getArg(routeArgsStart + 0), this.getArgByName("path")]
+    }
+
+    override DataFlow::Node getHandlerArg() { none() }
+
+    override Class getViewClass() { result.getADecorator() = this.asExpr() }
+
+    override Function getARequestHandler() {
+      // we're decorating a class
+      exists(this.getViewClass()) and
+      result = super.getARequestHandler()
+      or
+      // we're decorating a function
+      not exists(this.getViewClass()) and
+      result.getADecorator() = this.asExpr()
+    }
+  }
+
+  /** A class that we consider an aiohttp.web View class. */
+  abstract class AiohttpViewClass extends Class, SelfRefMixin {
+    /** Gets a function that could handle incoming requests, if any. */
+    Function getARequestHandler() {
+      // TODO: This doesn't handle attribute assignment. Should be OK, but analysis is not as complete as with
+      // points-to and `.lookup`, which would handle `post = my_post_handler` inside class def
+      result = this.getAMethod() and
+      result.getName() = HTTP::httpVerbLower()
+    }
+  }
+
+  /** A class that has a super-type which is an aiohttp.web View class. */
+  class AiohttpViewClassFromSuperClass extends AiohttpViewClass {
+    AiohttpViewClassFromSuperClass() { this.getABase() = View::subclassRef().getAUse().asExpr() }
+  }
+
+  /** A class that is used in a route-setup, therefore being considered an aiohttp.web View class. */
+  class AiohttpViewClassFromRouteSetup extends AiohttpViewClass {
+    AiohttpViewClassFromRouteSetup() { this = any(AiohttpRouteSetup rs).getViewClass() }
+  }
+
+  /** A request handler defined in an `aiohttp.web` view class, that has no known route. */
+  private class AiohttpViewClassRequestHandlerWithoutKnownRoute extends HTTP::Server::RequestHandler::Range {
+    AiohttpViewClassRequestHandlerWithoutKnownRoute() {
+      exists(AiohttpViewClass vc | vc.getARequestHandler() = this) and
+      not exists(AiohttpRouteSetup setup | setup.getARequestHandler() = this)
+    }
+
+    override Parameter getARoutedParameter() { none() }
+
+    override string getFramework() { result = "aiohttp.web" }
+  }
+
+  // ---------------------------------------------------------------------------
+  // aiohttp.web.Request taint modeling
+  // ---------------------------------------------------------------------------
+  /**
+   * Provides models for the `aiohttp.web.Request` class
+   *
+   * See https://docs.aiohttp.org/en/stable/web_reference.html#request-and-base-request
+   */
+  module Request {
+    /**
+     * A source of instances of `aiohttp.web.Request`, extend this class to model new instances.
+     *
+     * This can include instantiations of the class, return values from function
+     * calls, or a special parameter that will be set when functions are called by an external
+     * library.
+     *
+     * Use `Request::instance()` predicate to get
+     * references to instances of `aiohttp.web.Request`.
+     */
+    abstract class InstanceSource extends DataFlow::LocalSourceNode { }
+
+    /** Gets a reference to an instance of `aiohttp.web.Request`. */
+    private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
+      t.start() and
+      result instanceof InstanceSource
+      or
+      exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
+    }
+
+    /** Gets a reference to an instance of `aiohttp.web.Request`. */
+    DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
+  }
+
+  /**
+   * Provides models for the `aiohttp.StreamReader` class
+   *
+   * See https://docs.aiohttp.org/en/stable/streams.html#aiohttp.StreamReader
+   */
+  module StreamReader {
+    /**
+     * A source of instances of `aiohttp.StreamReader`, extend this class to model new instances.
+     *
+     * This can include instantiations of the class, return values from function
+     * calls, or a special parameter that will be set when functions are called by an external
+     * library.
+     *
+     * Use `StreamReader::instance()` predicate to get
+     * references to instances of `aiohttp.StreamReader`.
+     */
+    abstract class InstanceSource extends DataFlow::LocalSourceNode { }
+
+    /** Gets a reference to an instance of `aiohttp.StreamReader`. */
+    private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
+      t.start() and
+      result instanceof InstanceSource
+      or
+      exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
+    }
+
+    /** Gets a reference to an instance of `aiohttp.StreamReader`. */
+    DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
+
+    /**
+     * Taint propagation for `aiohttp.StreamReader`.
+     */
+    private class AiohttpStreamReaderAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
+      override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+        // Methods
+        //
+        // TODO: When we have tools that make it easy, model these properly to handle
+        // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
+        // (since it allows us to at least capture the most common cases).
+        nodeFrom = StreamReader::instance() and
+        exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom |
+          // normal methods
+          attr.getAttributeName() in ["read_nowait"] and
+          nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
+          or
+          // async methods
+          exists(Await await, DataFlow::CallCfgNode call |
+            attr.getAttributeName() in [
+                "read", "readany", "readexactly", "readline", "readchunk", "iter_chunked",
+                "iter_any", "iter_chunks"
+              ] and
+            call.getFunction() = attr and
+            await.getValue() = call.asExpr() and
+            nodeTo.asExpr() = await
+          )
+        )
+      }
+    }
+  }
+
+  /**
+   * A parameter that will receive an `aiohttp.web.Request` instance when a request
+   * handler is invoked.
+   */
+  class AiohttpRequestHandlerRequestParam extends Request::InstanceSource, RemoteFlowSource::Range,
+    DataFlow::ParameterNode {
+    AiohttpRequestHandlerRequestParam() {
+      exists(Function requestHandler |
+        requestHandler = any(AiohttpCoroutineRouteSetup setup).getARequestHandler() and
+        // We select the _last_ parameter for the request since that is what they do in
+        // `aiohttp-jinja2`.
+        // https://github.com/aio-libs/aiohttp-jinja2/blob/7fb4daf2c3003921d34031d38c2311ee0e02c18b/aiohttp_jinja2/__init__.py#L235
+        //
+        // I assume that is just to handle cases such as the one below
+        // ```py
+        // class MyCustomHandlerClass:
+        //     async def foo_handler(self, request):
+        //            ...
+        //
+        // my_custom_handler = MyCustomHandlerClass()
+        // app.router.add_get("/MyCustomHandlerClass/foo", my_custom_handler.foo_handler)
+        // ```
+        this.getParameter() =
+          max(Parameter param, int i | param = requestHandler.getArg(i) | param order by i)
+      )
+    }
+
+    override string getSourceType() { result = "aiohttp.web.Request" }
+  }
+
+  /**
+   * A read of the `request` attribute on an instance of an aiohttp.web View class,
+   * which is the request being processed currently.
+   */
+  class AiohttpViewClassRequestAttributeRead extends Request::InstanceSource,
+    RemoteFlowSource::Range, DataFlow::Node {
+    AiohttpViewClassRequestAttributeRead() {
+      this.(DataFlow::AttrRead).getObject() = any(AiohttpViewClass vc).getASelfRef() and
+      this.(DataFlow::AttrRead).getAttributeName() = "request"
+    }
+
+    override string getSourceType() {
+      result = "aiohttp.web.Request from self.request in View class"
+    }
+  }
+
+  /**
+   * Taint propagation for `aiohttp.web.Request`.
+   *
+   * See https://docs.aiohttp.org/en/stable/web_reference.html#request-and-base-request
+   */
+  private class AiohttpRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
+    override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+      // Methods
+      //
+      // TODO: When we have tools that make it easy, model these properly to handle
+      // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
+      // (since it allows us to at least capture the most common cases).
+      nodeFrom = Request::instance() and
+      exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom |
+        // normal methods
+        attr.getAttributeName() in ["clone", "get_extra_info"] and
+        nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
+        or
+        // async methods
+        exists(Await await, DataFlow::CallCfgNode call |
+          attr.getAttributeName() in ["read", "text", "json", "multipart", "post"] and
+          call.getFunction() = attr and
+          await.getValue() = call.asExpr() and
+          nodeTo.asExpr() = await
+        )
+      )
+      or
+      // Attributes
+      nodeFrom = Request::instance() and
+      nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and
+      nodeTo.(DataFlow::AttrRead).getAttributeName() in [
+          "url", "rel_url", "forwarded", "host", "remote", "path", "path_qs", "raw_path", "query",
+          "headers", "transport", "cookies", "content", "_payload", "content_type", "charset",
+          "http_range", "if_modified_since", "if_unmodified_since", "if_range", "match_info"
+        ]
+    }
+  }
+
+  /** An attribute read on an `aiohttp.web.Request` that is a `MultiDictProxy` instance. */
+  class AiohttpRequestMultiDictProxyInstances extends Multidict::MultiDictProxy::InstanceSource {
+    AiohttpRequestMultiDictProxyInstances() {
+      this.(DataFlow::AttrRead).getObject() = Request::instance() and
+      this.(DataFlow::AttrRead).getAttributeName() in ["query", "headers"]
+      or
+      // Handle the common case of `x = await request.post()`
+      // but don't try to handle anything else, since we don't have an easy way to do this yet.
+      // TODO: more complete handling of `await request.post()`
+      exists(Await await, DataFlow::CallCfgNode call, DataFlow::AttrRead read |
+        this.asExpr() = await
+      |
+        read.(DataFlow::AttrRead).getObject() = Request::instance() and
+        read.(DataFlow::AttrRead).getAttributeName() = "post" and
+        call.getFunction() = read and
+        await.getValue() = call.asExpr()
+      )
+    }
+  }
+
+  /** An attribute read on an `aiohttp.web.Request` that is a `yarl.URL` instance. */
+  class AiohttpRequestYarlUrlInstances extends Yarl::Url::InstanceSource {
+    AiohttpRequestYarlUrlInstances() {
+      this.(DataFlow::AttrRead).getObject() = Request::instance() and
+      this.(DataFlow::AttrRead).getAttributeName() in ["url", "rel_url"]
+    }
+  }
+
+  /** An attribute read on an `aiohttp.web.Request` that is a `aiohttp.StreamReader` instance. */
+  class AiohttpRequestStreamReaderInstances extends StreamReader::InstanceSource {
+    AiohttpRequestStreamReaderInstances() {
+      this.(DataFlow::AttrRead).getObject() = Request::instance() and
+      this.(DataFlow::AttrRead).getAttributeName() in ["content", "_payload"]
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // aiohttp.web Response modeling
+  // ---------------------------------------------------------------------------
+  /**
+   * An instantiation of `aiohttp.web.Response`.
+   *
+   * Note that `aiohttp.web.HTTPException` (and it's subclasses) is a subclass of `aiohttp.web.Response`.
+   *
+   * See
+   * - https://docs.aiohttp.org/en/stable/web_reference.html#aiohttp.web.Response
+   * - https://docs.aiohttp.org/en/stable/web_quickstart.html#aiohttp-web-exceptions
+   */
+  class AiohttpWebResponseInstantiation extends HTTP::Server::HttpResponse::Range,
+    DataFlow::CallCfgNode {
+    AiohttpWebResponseInstantiation() {
+      this = API::moduleImport("aiohttp").getMember("web").getMember("Response").getACall()
+      or
+      exists(string httpExceptionClassName |
+        httpExceptionClassName in [
+            "HTTPException", "HTTPSuccessful", "HTTPOk", "HTTPCreated", "HTTPAccepted",
+            "HTTPNonAuthoritativeInformation", "HTTPNoContent", "HTTPResetContent",
+            "HTTPPartialContent", "HTTPRedirection", "HTTPMultipleChoices", "HTTPMovedPermanently",
+            "HTTPFound", "HTTPSeeOther", "HTTPNotModified", "HTTPUseProxy", "HTTPTemporaryRedirect",
+            "HTTPPermanentRedirect", "HTTPError", "HTTPClientError", "HTTPBadRequest",
+            "HTTPUnauthorized", "HTTPPaymentRequired", "HTTPForbidden", "HTTPNotFound",
+            "HTTPMethodNotAllowed", "HTTPNotAcceptable", "HTTPProxyAuthenticationRequired",
+            "HTTPRequestTimeout", "HTTPConflict", "HTTPGone", "HTTPLengthRequired",
+            "HTTPPreconditionFailed", "HTTPRequestEntityTooLarge", "HTTPRequestURITooLong",
+            "HTTPUnsupportedMediaType", "HTTPRequestRangeNotSatisfiable", "HTTPExpectationFailed",
+            "HTTPMisdirectedRequest", "HTTPUnprocessableEntity", "HTTPFailedDependency",
+            "HTTPUpgradeRequired", "HTTPPreconditionRequired", "HTTPTooManyRequests",
+            "HTTPRequestHeaderFieldsTooLarge", "HTTPUnavailableForLegalReasons", "HTTPServerError",
+            "HTTPInternalServerError", "HTTPNotImplemented", "HTTPBadGateway",
+            "HTTPServiceUnavailable", "HTTPGatewayTimeout", "HTTPVersionNotSupported",
+            "HTTPVariantAlsoNegotiates", "HTTPInsufficientStorage", "HTTPNotExtended",
+            "HTTPNetworkAuthenticationRequired"
+          ] and
+        this =
+          API::moduleImport("aiohttp").getMember("web").getMember(httpExceptionClassName).getACall()
+      )
+    }
+
+    override DataFlow::Node getBody() {
+      result in [this.getArgByName("text"), this.getArgByName("body")]
+    }
+
+    override DataFlow::Node getMimetypeOrContentTypeArg() {
+      result = this.getArgByName("content_type")
+    }
+
+    override string getMimetypeDefault() {
+      exists(this.getArgByName("text")) and
+      result = "text/plain"
+      or
+      not exists(this.getArgByName("text")) and
+      result = "application/octet-stream"
+    }
+  }
+
+  /**
+   * An instantiation of aiohttp.web HTTP redirect exception.
+   *
+   * See the part about redirects at https://docs.aiohttp.org/en/stable/web_quickstart.html#aiohttp-web-exceptions
+   */
+  class AiohttpRedirectExceptionInstantiation extends AiohttpWebResponseInstantiation,
+    HTTP::Server::HttpRedirectResponse::Range {
+    AiohttpRedirectExceptionInstantiation() {
+      exists(string httpRedirectExceptionClassName |
+        httpRedirectExceptionClassName in [
+            "HTTPMultipleChoices", "HTTPMovedPermanently", "HTTPFound", "HTTPSeeOther",
+            "HTTPNotModified", "HTTPUseProxy", "HTTPTemporaryRedirect", "HTTPPermanentRedirect"
+          ] and
+        this =
+          API::moduleImport("aiohttp")
+              .getMember("web")
+              .getMember(httpRedirectExceptionClassName)
+              .getACall()
+      )
+    }
+
+    override DataFlow::Node getRedirectLocation() {
+      result in [this.getArg(0), this.getArgByName("location")]
+    }
+  }
+}
--- a/Show More
+++ b/Show More