Merge branch 'main' of github.com:github/codeql into python-pep-249

2026-05-01 19:55:15 +02:00 · 2020-11-02 16:58:31 +01:00
parent ac85a77ac5 cb527cae73
commit 6d850b2e0c
67 changed files with 595 additions and 891 deletions
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-022/ChainedConfigs12.qll
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-022/ChainedConfigs12.qll
--- a/python/ql/src/Security/CWE-022/PathInjection.ql
+++ b/python/ql/src/Security/CWE-022/PathInjection.ql
@@ -14,35 +14,103 @@
 *       external/cwe/cwe-036
 *       external/cwe/cwe-073
 *       external/cwe/cwe-099
+ *
+ * The query detects cases where a user-controlled path is used in an unsafe manner,
+ * meaning it is not both normalized and _afterwards_ checked.
+ *
+ * It does so by dividing the problematic situation into two cases:
+ *  1. The file path is never normalized.
+ *     This is easily detected by using normalization as a sanitizer.
+ *
+ *  2. The file path is normalized at least once, but never checked afterwards.
+ *     This is detected by finding the earliest normalization and then ensuring that
+ *     no checks happen later. Since we start from the earliest normalization,
+ *     we know that the absence of checks means that no normalization has a
+ *     check after it. (No checks after a second normalization would be ok if
+ *     there was a check between the first and the second.)
+ *
+ * Note that one could make the dual split on whether the file path is ever checked. This does
+ * not work as nicely, however, since checking is modelled as a `BarrierGuard` rather than
+ * as a `Sanitizer`. That means that only some dataflow paths out of a check will be removed,
+ * and so identifying the last check is not possible simply by finding a dataflow path from it
+ * to a sink.
 */

 import python
-import semmle.python.security.Paths
-/* Sources */
-import semmle.python.web.HttpRequest
-/* Sinks */
-import semmle.python.security.injection.Path
+import experimental.dataflow.DataFlow
+import experimental.dataflow.DataFlow2
+import experimental.dataflow.TaintTracking
+import experimental.dataflow.TaintTracking2
+import experimental.semmle.python.Concepts
+import experimental.dataflow.RemoteFlowSources
+import ChainedConfigs12

-class PathInjectionConfiguration extends TaintTracking::Configuration {
-  PathInjectionConfiguration() { this = "Path injection configuration" }
+// ---------------------------------------------------------------------------
+// Case 1. The path is never normalized.
+// ---------------------------------------------------------------------------
+/** Configuration to find paths from sources to sinks that contain no normalization. */
+class PathNotNormalizedConfiguration extends TaintTracking::Configuration {
+  PathNotNormalizedConfiguration() { this = "PathNotNormalizedConfiguration" }

-  override predicate isSource(TaintTracking::Source source) {
-    source instanceof HttpRequestTaintSource
+  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
+
+  override predicate isSink(DataFlow::Node sink) {
+    sink = any(FileSystemAccess e).getAPathArgument()
  }

-  override predicate isSink(TaintTracking::Sink sink) { sink instanceof OpenNode }
+  override predicate isSanitizer(DataFlow::Node node) { node instanceof Path::PathNormalization }
+}

-  override predicate isSanitizer(Sanitizer sanitizer) {
-    sanitizer instanceof PathSanitizer or
-    sanitizer instanceof NormalizedPathSanitizer
+predicate pathNotNormalized(CustomPathNode source, CustomPathNode sink) {
+  any(PathNotNormalizedConfiguration config).hasFlowPath(source.asNode1(), sink.asNode1())
+}
+
+// ---------------------------------------------------------------------------
+// Case 2. The path is normalized at least once, but never checked afterwards.
+// ---------------------------------------------------------------------------
+/** Configuration to find paths from sources to normalizations that contain no prior normalizations. */
+class FirstNormalizationConfiguration extends TaintTracking::Configuration {
+  FirstNormalizationConfiguration() { this = "FirstNormalizationConfiguration" }
+
+  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
+
+  override predicate isSink(DataFlow::Node sink) { sink instanceof Path::PathNormalization }
+
+  override predicate isSanitizerOut(DataFlow::Node node) { node instanceof Path::PathNormalization }
+}
+
+/** Configuration to find paths from normalizations to sinks that do not go through a check. */
+class NormalizedPathNotCheckedConfiguration extends TaintTracking2::Configuration {
+  NormalizedPathNotCheckedConfiguration() { this = "NormalizedPathNotCheckedConfiguration" }
+
+  override predicate isSource(DataFlow::Node source) { source instanceof Path::PathNormalization }
+
+  override predicate isSink(DataFlow::Node sink) {
+    sink = any(FileSystemAccess e).getAPathArgument()
  }

-  override predicate isExtension(TaintTracking::Extension extension) {
-    extension instanceof AbsPath
+  override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
+    guard instanceof Path::SafeAccessCheck
  }
 }

-from PathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
-where config.hasFlowPath(src, sink)
-select sink.getSink(), src, sink, "This path depends on $@.", src.getSource(),
-  "a user-provided value"
+predicate pathNotCheckedAfterNormalization(CustomPathNode source, CustomPathNode sink) {
+  exists(
+    FirstNormalizationConfiguration config, DataFlow::PathNode mid1, DataFlow2::PathNode mid2,
+    NormalizedPathNotCheckedConfiguration config2
+  |
+    config.hasFlowPath(source.asNode1(), mid1) and
+    config2.hasFlowPath(mid2, sink.asNode2()) and
+    mid1.getNode().asCfgNode() = mid2.getNode().asCfgNode()
+  )
+}
+
+// ---------------------------------------------------------------------------
+// Query: Either case 1 or case 2.
+// ---------------------------------------------------------------------------
+from CustomPathNode source, CustomPathNode sink
+where
+  pathNotNormalized(source, sink)
+  or
+  pathNotCheckedAfterNormalization(source, sink)
+select sink, source, sink, "This path depends on $@.", source, "a user-provided value"
--- a/python/ql/src/Security/CWE-078/CommandInjection.ql
+++ b/python/ql/src/Security/CWE-078/CommandInjection.ql
@@ -15,29 +15,51 @@
 */

 import python
-import semmle.python.security.Paths
-/* Sources */
-import semmle.python.web.HttpRequest
-/* Sinks */
-import semmle.python.security.injection.Command
+import experimental.dataflow.DataFlow
+import experimental.dataflow.TaintTracking
+import experimental.semmle.python.Concepts
+import experimental.dataflow.RemoteFlowSources
+import DataFlow::PathGraph

 class CommandInjectionConfiguration extends TaintTracking::Configuration {
-  CommandInjectionConfiguration() { this = "Command injection configuration" }
+  CommandInjectionConfiguration() { this = "CommandInjectionConfiguration" }

-  override predicate isSource(TaintTracking::Source source) {
-    source instanceof HttpRequestTaintSource
-  }
+  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }

-  override predicate isSink(TaintTracking::Sink sink) { sink instanceof CommandSink }
-
-  override predicate isExtension(TaintTracking::Extension extension) {
-    extension instanceof FirstElementFlow
-    or
-    extension instanceof FabricExecuteExtension
+  override predicate isSink(DataFlow::Node sink) {
+    sink = any(SystemCommandExecution e).getCommand() and
+    // Since the implementation of standard library functions such `os.popen` looks like
+    // ```py
+    // def popen(cmd, mode="r", buffering=-1):
+    //     ...
+    //     proc = subprocess.Popen(cmd, ...)
+    // ```
+    // any time we would report flow to the `os.popen` sink, we can ALSO report the flow
+    // from the `cmd` parameter to the `subprocess.Popen` sink -- obviously we don't
+    // want that.
+    //
+    // However, simply removing taint edges out of a sink is not a good enough solution,
+    // since we would only flag one of the `os.system` calls in the following example
+    // due to use-use flow
+    // ```py
+    // os.system(cmd)
+    // os.system(cmd)
+    // ```
+    //
+    // Best solution I could come up with is to exclude all sinks inside the modules of
+    // known sinks. This does have a downside: If we have overlooked a function in any
+    // of these, that internally runs a command, we no longer give an alert :| -- and we
+    // need to keep them updated (which is hard to remember)
+    //
+    // This does not only affect `os.popen`, but also the helper functions in
+    // `subprocess`. See:
+    // https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
+    // https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
+    not sink.getScope().getEnclosingModule().getName() in ["os", "subprocess", "platform", "popen2"]
  }
 }

-from CommandInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
-where config.hasFlowPath(src, sink)
-select sink.getSink(), src, sink, "This command depends on $@.", src.getSource(),
+from CommandInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "This command depends on $@.", source.getNode(),
  "a user-provided value"
--- a/python/ql/src/Security/CWE-079/ReflectedXss.ql
+++ b/python/ql/src/Security/CWE-079/ReflectedXss.ql
@@ -13,30 +13,26 @@
 */

 import python
-import semmle.python.security.Paths
-/* Sources */
-import semmle.python.web.HttpRequest
-/* Sinks */
-import semmle.python.web.HttpResponse
-/* Flow */
-import semmle.python.security.strings.Untrusted
+import experimental.dataflow.DataFlow
+import experimental.dataflow.TaintTracking
+import experimental.semmle.python.Concepts
+import experimental.dataflow.RemoteFlowSources
+import DataFlow::PathGraph

 class ReflectedXssConfiguration extends TaintTracking::Configuration {
-  ReflectedXssConfiguration() { this = "Reflected XSS configuration" }
+  ReflectedXssConfiguration() { this = "ReflectedXssConfiguration" }

-  override predicate isSource(TaintTracking::Source source) {
-    source instanceof HttpRequestTaintSource
-  }
+  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }

-  override predicate isSink(TaintTracking::Sink sink) {
-    sink instanceof HttpResponseTaintSink and
-    not sink instanceof DjangoResponseContent
-    or
-    sink instanceof DjangoResponseContentXSSVulnerable
+  override predicate isSink(DataFlow::Node sink) {
+    exists(HTTP::Server::HttpResponse response |
+      response.getMimetype().toLowerCase() = "text/html" and
+      sink = response.getBody()
+    )
  }
 }

-from ReflectedXssConfiguration config, TaintedPathSource src, TaintedPathSink sink
-where config.hasFlowPath(src, sink)
-select sink.getSink(), src, sink, "Cross-site scripting vulnerability due to $@.", src.getSource(),
-  "a user-provided value"
+from ReflectedXssConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",
+  source.getNode(), "a user-provided value"
--- a/python/ql/src/Security/CWE-089/SqlInjection.ql
+++ b/python/ql/src/Security/CWE-089/SqlInjection.ql
@@ -12,40 +12,21 @@
 */

 import python
-import semmle.python.security.Paths
-/* Sources */
-import semmle.python.web.HttpRequest
-/* Sinks */
-import semmle.python.security.injection.Sql
-import semmle.python.web.django.Db
-import semmle.python.web.django.Model
+import experimental.dataflow.DataFlow
+import experimental.dataflow.TaintTracking
+import experimental.semmle.python.Concepts
+import experimental.dataflow.RemoteFlowSources
+import DataFlow::PathGraph

 class SQLInjectionConfiguration extends TaintTracking::Configuration {
-  SQLInjectionConfiguration() { this = "SQL injection configuration" }
+  SQLInjectionConfiguration() { this = "SQLInjectionConfiguration" }

-  override predicate isSource(TaintTracking::Source source) {
-    source instanceof HttpRequestTaintSource
-  }
+  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }

-  override predicate isSink(TaintTracking::Sink sink) { sink instanceof SqlInjectionSink }
+  override predicate isSink(DataFlow::Node sink) { sink = any(SqlExecution e).getSql() }
 }

-/*
- * Additional configuration to support tracking of DB objects. Connections, cursors, etc.
- * Without this configuration (or the LegacyConfiguration), the pattern of
- * `any(MyTaintKind k).taints(control_flow_node)` used in DbConnectionExecuteArgument would not work.
- */
-
-class DbConfiguration extends TaintTracking::Configuration {
-  DbConfiguration() { this = "DB configuration" }
-
-  override predicate isSource(TaintTracking::Source source) {
-    source instanceof DjangoModelObjects or
-    source instanceof DbConnectionSource
-  }
-}
-
-from SQLInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
-where config.hasFlowPath(src, sink)
-select sink.getSink(), src, sink, "This SQL query depends on $@.", src.getSource(),
+from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
  "a user-provided value"
--- a/python/ql/src/Security/CWE-094/CodeInjection.ql
+++ b/python/ql/src/Security/CWE-094/CodeInjection.ql
@@ -1,6 +1,6 @@
 /**
 * @name Code injection
- * @description Interpreting unsanitized user input as code allows a malicious user arbitrary
+ * @description Interpreting unsanitized user input as code allows a malicious user to perform arbitrary
 *              code execution.
 * @kind path-problem
 * @problem.severity error
@@ -15,23 +15,21 @@
 */

 import python
-import semmle.python.security.Paths
-/* Sources */
-import semmle.python.web.HttpRequest
-/* Sinks */
-import semmle.python.security.injection.Exec
+import experimental.dataflow.DataFlow
+import experimental.dataflow.TaintTracking
+import experimental.semmle.python.Concepts
+import experimental.dataflow.RemoteFlowSources
+import DataFlow::PathGraph

 class CodeInjectionConfiguration extends TaintTracking::Configuration {
-  CodeInjectionConfiguration() { this = "Code injection configuration" }
+  CodeInjectionConfiguration() { this = "CodeInjectionConfiguration" }

-  override predicate isSource(TaintTracking::Source source) {
-    source instanceof HttpRequestTaintSource
-  }
+  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }

-  override predicate isSink(TaintTracking::Sink sink) { sink instanceof StringEvaluationNode }
+  override predicate isSink(DataFlow::Node sink) { sink = any(CodeExecution e).getCode() }
 }

-from CodeInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
-where config.hasFlowPath(src, sink)
-select sink.getSink(), src, sink, "$@ flows to here and is interpreted as code.", src.getSource(),
-  "A user-provided value"
+from CodeInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
+  source.getNode(), "A user-provided value"
--- a/python/ql/src/Security/CWE-502/UnsafeDeserialization.ql
+++ b/python/ql/src/Security/CWE-502/UnsafeDeserialization.ql
@@ -12,26 +12,25 @@
 */

 import python
-import semmle.python.security.Paths
-// Sources -- Any untrusted input
-import semmle.python.web.HttpRequest
-// Flow -- untrusted string
-import semmle.python.security.strings.Untrusted
-// Sink -- Unpickling and other deserialization formats.
-import semmle.python.security.injection.Pickle
-import semmle.python.security.injection.Marshal
-import semmle.python.security.injection.Yaml
+import experimental.dataflow.DataFlow
+import experimental.dataflow.TaintTracking
+import experimental.semmle.python.Concepts
+import experimental.dataflow.RemoteFlowSources
+import DataFlow::PathGraph

 class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
-  UnsafeDeserializationConfiguration() { this = "Unsafe deserialization configuration" }
+  UnsafeDeserializationConfiguration() { this = "UnsafeDeserializationConfiguration" }

-  override predicate isSource(TaintTracking::Source source) {
-    source instanceof HttpRequestTaintSource
+  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
+
+  override predicate isSink(DataFlow::Node sink) {
+    exists(Decoding d |
+      d.mayExecuteInput() and
+      sink = d.getAnInput()
+    )
  }
-
-  override predicate isSink(TaintTracking::Sink sink) { sink instanceof DeserializationSink }
 }

-from UnsafeDeserializationConfiguration config, TaintedPathSource src, TaintedPathSink sink
-where config.hasFlowPath(src, sink)
-select sink.getSink(), src, sink, "Deserializing of $@.", src.getSource(), "untrusted input"
+from UnsafeDeserializationConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-022/PathInjection.ql
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-022/PathInjection.ql
@@ -1,116 +0,0 @@
-/**
- * @name Uncontrolled data used in path expression
- * @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
- * @kind path-problem
- * @problem.severity error
- * @sub-severity high
- * @precision high
- * @id py/path-injection
- * @tags correctness
- *       security
- *       external/owasp/owasp-a1
- *       external/cwe/cwe-022
- *       external/cwe/cwe-023
- *       external/cwe/cwe-036
- *       external/cwe/cwe-073
- *       external/cwe/cwe-099
- *
- * The query detects cases where a user-controlled path is used in an unsafe manner,
- * meaning it is not both normalized and _afterwards_ checked.
- *
- * It does so by dividing the problematic situation into two cases:
- *  1. The file path is never normalized.
- *     This is easily detected by using normalization as a sanitizer.
- *
- *  2. The file path is normalized at least once, but never checked afterwards.
- *     This is detected by finding the earliest normalization and then ensuring that
- *     no checks happen later. Since we start from the earliest normalization,
- *     we know that the absence of checks means that no normalization has a
- *     check after it. (No checks after a second normalization would be ok if
- *     there was a check between the first and the second.)
- *
- * Note that one could make the dual split on whether the file path is ever checked. This does
- * not work as nicely, however, since checking is modelled as a `BarrierGuard` rather than
- * as a `Sanitizer`. That means that only some dataflow paths out of a check will be removed,
- * and so identifying the last check is not possible simply by finding a dataflow path from it
- * to a sink.
- */
-
-import python
-import experimental.dataflow.DataFlow
-import experimental.dataflow.DataFlow2
-import experimental.dataflow.TaintTracking
-import experimental.dataflow.TaintTracking2
-import experimental.semmle.python.Concepts
-import experimental.dataflow.RemoteFlowSources
-import ChainedConfigs12
-
-// ---------------------------------------------------------------------------
-// Case 1. The path is never normalized.
-// ---------------------------------------------------------------------------
-/** Configuration to find paths from sources to sinks that contain no normalization. */
-class PathNotNormalizedConfiguration extends TaintTracking::Configuration {
-  PathNotNormalizedConfiguration() { this = "PathNotNormalizedConfiguration" }
-
-  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
-
-  override predicate isSink(DataFlow::Node sink) {
-    sink = any(FileSystemAccess e).getAPathArgument()
-  }
-
-  override predicate isSanitizer(DataFlow::Node node) { node instanceof Path::PathNormalization }
-}
-
-predicate pathNotNormalized(CustomPathNode source, CustomPathNode sink) {
-  any(PathNotNormalizedConfiguration config).hasFlowPath(source.asNode1(), sink.asNode1())
-}
-
-// ---------------------------------------------------------------------------
-// Case 2. The path is normalized at least once, but never checked afterwards.
-// ---------------------------------------------------------------------------
-/** Configuration to find paths from sources to normalizations that contain no prior normalizations. */
-class FirstNormalizationConfiguration extends TaintTracking::Configuration {
-  FirstNormalizationConfiguration() { this = "FirstNormalizationConfiguration" }
-
-  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
-
-  override predicate isSink(DataFlow::Node sink) { sink instanceof Path::PathNormalization }
-
-  override predicate isSanitizerOut(DataFlow::Node node) { node instanceof Path::PathNormalization }
-}
-
-/** Configuration to find paths from normalizations to sinks that do not go through a check. */
-class NormalizedPathNotCheckedConfiguration extends TaintTracking2::Configuration {
-  NormalizedPathNotCheckedConfiguration() { this = "NormalizedPathNotCheckedConfiguration" }
-
-  override predicate isSource(DataFlow::Node source) { source instanceof Path::PathNormalization }
-
-  override predicate isSink(DataFlow::Node sink) {
-    sink = any(FileSystemAccess e).getAPathArgument()
-  }
-
-  override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
-    guard instanceof Path::SafeAccessCheck
-  }
-}
-
-predicate pathNotCheckedAfterNormalization(CustomPathNode source, CustomPathNode sink) {
-  exists(
-    FirstNormalizationConfiguration config, DataFlow::PathNode mid1, DataFlow2::PathNode mid2,
-    NormalizedPathNotCheckedConfiguration config2
-  |
-    config.hasFlowPath(source.asNode1(), mid1) and
-    config2.hasFlowPath(mid2, sink.asNode2()) and
-    mid1.getNode().asCfgNode() = mid2.getNode().asCfgNode()
-  )
-}
-
-// ---------------------------------------------------------------------------
-// Query: Either case 1 or case 2.
-// ---------------------------------------------------------------------------
-from CustomPathNode source, CustomPathNode sink
-where
-  pathNotNormalized(source, sink)
-  or
-  pathNotCheckedAfterNormalization(source, sink)
-select sink, source, sink, "This path depends on $@.", source, "a user-provided value"
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-078/CommandInjection.ql
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-078/CommandInjection.ql
@@ -1,65 +0,0 @@
-/**
- * @name Uncontrolled command line
- * @description Using externally controlled strings in a command line may allow a malicious
- *              user to change the meaning of the command.
- * @kind path-problem
- * @problem.severity error
- * @sub-severity high
- * @precision high
- * @id py/command-line-injection
- * @tags correctness
- *       security
- *       external/owasp/owasp-a1
- *       external/cwe/cwe-078
- *       external/cwe/cwe-088
- */
-
-import python
-import experimental.dataflow.DataFlow
-import experimental.dataflow.TaintTracking
-import experimental.semmle.python.Concepts
-import experimental.dataflow.RemoteFlowSources
-import DataFlow::PathGraph
-
-class CommandInjectionConfiguration extends TaintTracking::Configuration {
-  CommandInjectionConfiguration() { this = "CommandInjectionConfiguration" }
-
-  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
-
-  override predicate isSink(DataFlow::Node sink) {
-    sink = any(SystemCommandExecution e).getCommand() and
-    // Since the implementation of standard library functions such `os.popen` looks like
-    // ```py
-    // def popen(cmd, mode="r", buffering=-1):
-    //     ...
-    //     proc = subprocess.Popen(cmd, ...)
-    // ```
-    // any time we would report flow to the `os.popen` sink, we can ALSO report the flow
-    // from the `cmd` parameter to the `subprocess.Popen` sink -- obviously we don't
-    // want that.
-    //
-    // However, simply removing taint edges out of a sink is not a good enough solution,
-    // since we would only flag one of the `os.system` calls in the following example
-    // due to use-use flow
-    // ```py
-    // os.system(cmd)
-    // os.system(cmd)
-    // ```
-    //
-    // Best solution I could come up with is to exclude all sinks inside the modules of
-    // known sinks. This does have a downside: If we have overlooked a function in any
-    // of these, that internally runs a command, we no longer give an alert :| -- and we
-    // need to keep them updated (which is hard to remember)
-    //
-    // This does not only affect `os.popen`, but also the helper functions in
-    // `subprocess`. See:
-    // https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
-    // https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
-    not sink.getScope().getEnclosingModule().getName() in ["os", "subprocess", "platform", "popen2"]
-  }
-}
-
-from CommandInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
-where config.hasFlowPath(source, sink)
-select sink.getNode(), source, sink, "This command depends on $@.", source.getNode(),
-  "a user-provided value"
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-079/ReflectedXss.ql
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-079/ReflectedXss.ql
@@ -1,38 +0,0 @@
-/**
- * @name Reflected server-side cross-site scripting
- * @description Writing user input directly to a web page
- *              allows for a cross-site scripting vulnerability.
- * @kind path-problem
- * @problem.severity error
- * @sub-severity high
- * @precision high
- * @id py/reflective-xss
- * @tags security
- *       external/cwe/cwe-079
- *       external/cwe/cwe-116
- */
-
-import python
-import experimental.dataflow.DataFlow
-import experimental.dataflow.TaintTracking
-import experimental.semmle.python.Concepts
-import experimental.dataflow.RemoteFlowSources
-import DataFlow::PathGraph
-
-class ReflectedXssConfiguration extends TaintTracking::Configuration {
-  ReflectedXssConfiguration() { this = "ReflectedXssConfiguration" }
-
-  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
-
-  override predicate isSink(DataFlow::Node sink) {
-    exists(HTTP::Server::HttpResponse response |
-      response.getMimetype().toLowerCase() = "text/html" and
-      sink = response.getBody()
-    )
-  }
-}
-
-from ReflectedXssConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
-where config.hasFlowPath(source, sink)
-select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",
-  source.getNode(), "a user-provided value"
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-089/SqlInjection.ql
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-089/SqlInjection.ql
@@ -1,32 +0,0 @@
-/**
- * @name SQL query built from user-controlled sources
- * @description Building a SQL query from user-controlled sources is vulnerable to insertion of
- *              malicious SQL code by the user.
- * @kind path-problem
- * @problem.severity error
- * @precision high
- * @id py/sql-injection
- * @tags security
- *       external/cwe/cwe-089
- *       external/owasp/owasp-a1
- */
-
-import python
-import experimental.dataflow.DataFlow
-import experimental.dataflow.TaintTracking
-import experimental.semmle.python.Concepts
-import experimental.dataflow.RemoteFlowSources
-import DataFlow::PathGraph
-
-class SQLInjectionConfiguration extends TaintTracking::Configuration {
-  SQLInjectionConfiguration() { this = "SQLInjectionConfiguration" }
-
-  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
-
-  override predicate isSink(DataFlow::Node sink) { sink = any(SqlExecution e).getSql() }
-}
-
-from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
-where config.hasFlowPath(source, sink)
-select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
-  "a user-provided value"
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-094/CodeInjection.ql
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-094/CodeInjection.ql
@@ -1,35 +0,0 @@
-/**
- * @name Code injection
- * @description Interpreting unsanitized user input as code allows a malicious user to perform arbitrary
- *              code execution.
- * @kind path-problem
- * @problem.severity error
- * @sub-severity high
- * @precision high
- * @id py/code-injection
- * @tags security
- *       external/owasp/owasp-a1
- *       external/cwe/cwe-094
- *       external/cwe/cwe-095
- *       external/cwe/cwe-116
- */
-
-import python
-import experimental.dataflow.DataFlow
-import experimental.dataflow.TaintTracking
-import experimental.semmle.python.Concepts
-import experimental.dataflow.RemoteFlowSources
-import DataFlow::PathGraph
-
-class CodeInjectionConfiguration extends TaintTracking::Configuration {
-  CodeInjectionConfiguration() { this = "CodeInjectionConfiguration" }
-
-  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
-
-  override predicate isSink(DataFlow::Node sink) { sink = any(CodeExecution e).getCode() }
-}
-
-from CodeInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
-where config.hasFlowPath(source, sink)
-select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
-  source.getNode(), "A user-provided value"
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-502/UnsafeDeserialization.ql
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-502/UnsafeDeserialization.ql
@@ -1,36 +0,0 @@
-/**
- * @name Deserializing untrusted input
- * @description Deserializing user-controlled data may allow attackers to execute arbitrary code.
- * @kind path-problem
- * @id py/unsafe-deserialization
- * @problem.severity error
- * @sub-severity high
- * @precision high
- * @tags external/cwe/cwe-502
- *       security
- *       serialization
- */
-
-import python
-import experimental.dataflow.DataFlow
-import experimental.dataflow.TaintTracking
-import experimental.semmle.python.Concepts
-import experimental.dataflow.RemoteFlowSources
-import DataFlow::PathGraph
-
-class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
-  UnsafeDeserializationConfiguration() { this = "UnsafeDeserializationConfiguration" }
-
-  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
-
-  override predicate isSink(DataFlow::Node sink) {
-    exists(Decoding d |
-      d.mayExecuteInput() and
-      sink = d.getAnInput()
-    )
-  }
-}
-
-from UnsafeDeserializationConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
-where config.hasFlowPath(source, sink)
-select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"
--- a/python/ql/src/experimental/Security-old-dataflow/CWE-022/PathInjection.ql
+++ b/python/ql/src/experimental/Security-old-dataflow/CWE-022/PathInjection.ql
@@ -0,0 +1,48 @@
+/**
+ * @name Uncontrolled data used in path expression
+ * @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
+ * @kind path-problem
+ * @problem.severity error
+ * @sub-severity high
+ * @precision high
+ * @id py/path-injection
+ * @tags correctness
+ *       security
+ *       external/owasp/owasp-a1
+ *       external/cwe/cwe-022
+ *       external/cwe/cwe-023
+ *       external/cwe/cwe-036
+ *       external/cwe/cwe-073
+ *       external/cwe/cwe-099
+ */
+
+import python
+import semmle.python.security.Paths
+/* Sources */
+import semmle.python.web.HttpRequest
+/* Sinks */
+import semmle.python.security.injection.Path
+
+class PathInjectionConfiguration extends TaintTracking::Configuration {
+  PathInjectionConfiguration() { this = "Path injection configuration" }
+
+  override predicate isSource(TaintTracking::Source source) {
+    source instanceof HttpRequestTaintSource
+  }
+
+  override predicate isSink(TaintTracking::Sink sink) { sink instanceof OpenNode }
+
+  override predicate isSanitizer(Sanitizer sanitizer) {
+    sanitizer instanceof PathSanitizer or
+    sanitizer instanceof NormalizedPathSanitizer
+  }
+
+  override predicate isExtension(TaintTracking::Extension extension) {
+    extension instanceof AbsPath
+  }
+}
+
+from PathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
+where config.hasFlowPath(src, sink)
+select sink.getSink(), src, sink, "This path depends on $@.", src.getSource(),
+  "a user-provided value"
--- a/python/ql/src/experimental/Security-old-dataflow/CWE-078/CommandInjection.ql
+++ b/python/ql/src/experimental/Security-old-dataflow/CWE-078/CommandInjection.ql
@@ -0,0 +1,43 @@
+/**
+ * @name Uncontrolled command line
+ * @description Using externally controlled strings in a command line may allow a malicious
+ *              user to change the meaning of the command.
+ * @kind path-problem
+ * @problem.severity error
+ * @sub-severity high
+ * @precision high
+ * @id py/command-line-injection
+ * @tags correctness
+ *       security
+ *       external/owasp/owasp-a1
+ *       external/cwe/cwe-078
+ *       external/cwe/cwe-088
+ */
+
+import python
+import semmle.python.security.Paths
+/* Sources */
+import semmle.python.web.HttpRequest
+/* Sinks */
+import semmle.python.security.injection.Command
+
+class CommandInjectionConfiguration extends TaintTracking::Configuration {
+  CommandInjectionConfiguration() { this = "Command injection configuration" }
+
+  override predicate isSource(TaintTracking::Source source) {
+    source instanceof HttpRequestTaintSource
+  }
+
+  override predicate isSink(TaintTracking::Sink sink) { sink instanceof CommandSink }
+
+  override predicate isExtension(TaintTracking::Extension extension) {
+    extension instanceof FirstElementFlow
+    or
+    extension instanceof FabricExecuteExtension
+  }
+}
+
+from CommandInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
+where config.hasFlowPath(src, sink)
+select sink.getSink(), src, sink, "This command depends on $@.", src.getSource(),
+  "a user-provided value"
--- a/python/ql/src/experimental/Security-old-dataflow/CWE-079/ReflectedXss.ql
+++ b/python/ql/src/experimental/Security-old-dataflow/CWE-079/ReflectedXss.ql
@@ -0,0 +1,42 @@
+/**
+ * @name Reflected server-side cross-site scripting
+ * @description Writing user input directly to a web page
+ *              allows for a cross-site scripting vulnerability.
+ * @kind path-problem
+ * @problem.severity error
+ * @sub-severity high
+ * @precision high
+ * @id py/reflective-xss
+ * @tags security
+ *       external/cwe/cwe-079
+ *       external/cwe/cwe-116
+ */
+
+import python
+import semmle.python.security.Paths
+/* Sources */
+import semmle.python.web.HttpRequest
+/* Sinks */
+import semmle.python.web.HttpResponse
+/* Flow */
+import semmle.python.security.strings.Untrusted
+
+class ReflectedXssConfiguration extends TaintTracking::Configuration {
+  ReflectedXssConfiguration() { this = "Reflected XSS configuration" }
+
+  override predicate isSource(TaintTracking::Source source) {
+    source instanceof HttpRequestTaintSource
+  }
+
+  override predicate isSink(TaintTracking::Sink sink) {
+    sink instanceof HttpResponseTaintSink and
+    not sink instanceof DjangoResponseContent
+    or
+    sink instanceof DjangoResponseContentXSSVulnerable
+  }
+}
+
+from ReflectedXssConfiguration config, TaintedPathSource src, TaintedPathSink sink
+where config.hasFlowPath(src, sink)
+select sink.getSink(), src, sink, "Cross-site scripting vulnerability due to $@.", src.getSource(),
+  "a user-provided value"
--- a/python/ql/src/experimental/Security-old-dataflow/CWE-089/SqlInjection.ql
+++ b/python/ql/src/experimental/Security-old-dataflow/CWE-089/SqlInjection.ql
@@ -0,0 +1,51 @@
+/**
+ * @name SQL query built from user-controlled sources
+ * @description Building a SQL query from user-controlled sources is vulnerable to insertion of
+ *              malicious SQL code by the user.
+ * @kind path-problem
+ * @problem.severity error
+ * @precision high
+ * @id py/sql-injection
+ * @tags security
+ *       external/cwe/cwe-089
+ *       external/owasp/owasp-a1
+ */
+
+import python
+import semmle.python.security.Paths
+/* Sources */
+import semmle.python.web.HttpRequest
+/* Sinks */
+import semmle.python.security.injection.Sql
+import semmle.python.web.django.Db
+import semmle.python.web.django.Model
+
+class SQLInjectionConfiguration extends TaintTracking::Configuration {
+  SQLInjectionConfiguration() { this = "SQL injection configuration" }
+
+  override predicate isSource(TaintTracking::Source source) {
+    source instanceof HttpRequestTaintSource
+  }
+
+  override predicate isSink(TaintTracking::Sink sink) { sink instanceof SqlInjectionSink }
+}
+
+/*
+ * Additional configuration to support tracking of DB objects. Connections, cursors, etc.
+ * Without this configuration (or the LegacyConfiguration), the pattern of
+ * `any(MyTaintKind k).taints(control_flow_node)` used in DbConnectionExecuteArgument would not work.
+ */
+
+class DbConfiguration extends TaintTracking::Configuration {
+  DbConfiguration() { this = "DB configuration" }
+
+  override predicate isSource(TaintTracking::Source source) {
+    source instanceof DjangoModelObjects or
+    source instanceof DbConnectionSource
+  }
+}
+
+from SQLInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
+where config.hasFlowPath(src, sink)
+select sink.getSink(), src, sink, "This SQL query depends on $@.", src.getSource(),
+  "a user-provided value"
--- a/python/ql/src/experimental/Security-old-dataflow/CWE-094/CodeInjection.ql
+++ b/python/ql/src/experimental/Security-old-dataflow/CWE-094/CodeInjection.ql
@@ -0,0 +1,37 @@
+/**
+ * @name Code injection
+ * @description Interpreting unsanitized user input as code allows a malicious user arbitrary
+ *              code execution.
+ * @kind path-problem
+ * @problem.severity error
+ * @sub-severity high
+ * @precision high
+ * @id py/code-injection
+ * @tags security
+ *       external/owasp/owasp-a1
+ *       external/cwe/cwe-094
+ *       external/cwe/cwe-095
+ *       external/cwe/cwe-116
+ */
+
+import python
+import semmle.python.security.Paths
+/* Sources */
+import semmle.python.web.HttpRequest
+/* Sinks */
+import semmle.python.security.injection.Exec
+
+class CodeInjectionConfiguration extends TaintTracking::Configuration {
+  CodeInjectionConfiguration() { this = "Code injection configuration" }
+
+  override predicate isSource(TaintTracking::Source source) {
+    source instanceof HttpRequestTaintSource
+  }
+
+  override predicate isSink(TaintTracking::Sink sink) { sink instanceof StringEvaluationNode }
+}
+
+from CodeInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
+where config.hasFlowPath(src, sink)
+select sink.getSink(), src, sink, "$@ flows to here and is interpreted as code.", src.getSource(),
+  "A user-provided value"
--- a/python/ql/src/experimental/Security-old-dataflow/CWE-502/UnsafeDeserialization.ql
+++ b/python/ql/src/experimental/Security-old-dataflow/CWE-502/UnsafeDeserialization.ql
@@ -0,0 +1,37 @@
+/**
+ * @name Deserializing untrusted input
+ * @description Deserializing user-controlled data may allow attackers to execute arbitrary code.
+ * @kind path-problem
+ * @id py/unsafe-deserialization
+ * @problem.severity error
+ * @sub-severity high
+ * @precision high
+ * @tags external/cwe/cwe-502
+ *       security
+ *       serialization
+ */
+
+import python
+import semmle.python.security.Paths
+// Sources -- Any untrusted input
+import semmle.python.web.HttpRequest
+// Flow -- untrusted string
+import semmle.python.security.strings.Untrusted
+// Sink -- Unpickling and other deserialization formats.
+import semmle.python.security.injection.Pickle
+import semmle.python.security.injection.Marshal
+import semmle.python.security.injection.Yaml
+
+class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
+  UnsafeDeserializationConfiguration() { this = "Unsafe deserialization configuration" }
+
+  override predicate isSource(TaintTracking::Source source) {
+    source instanceof HttpRequestTaintSource
+  }
+
+  override predicate isSink(TaintTracking::Sink sink) { sink instanceof DeserializationSink }
+}
+
+from UnsafeDeserializationConfiguration config, TaintedPathSource src, TaintedPathSink sink
+where config.hasFlowPath(src, sink)
+select sink.getSink(), src, sink, "Deserializing of $@.", src.getSource(), "untrusted input"
--- a/python/ql/src/experimental/semmle/python/frameworks/Django.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Django.qll
@@ -133,49 +133,20 @@ private module Django {
            t.startInAttr("Model") and
            result = models()
            or
+            // subclass
+            result.asExpr().(ClassExpr).getABase() = classRef(t.continue()).asExpr()
+            or
            exists(DataFlow::TypeTracker t2 | result = classRef(t2).track(t2, t))
          }

          /** Gets a reference to the `django.db.models.Model` class. */
          DataFlow::Node classRef() { result = classRef(DataFlow::TypeTracker::end()) }
-
-          /** Gets a definition of a subclass the `django.db.models.Model` class. */
-          class SubclassDef extends ControlFlowNode {
-            string name;
-
-            SubclassDef() {
-              exists(ClassExpr ce |
-                this.getNode() = ce and
-                ce.getABase() = classRef().asExpr() and
-                ce.getName() = name
-              )
-            }
-
-            string getName() { result = name }
-          }
-
-          /**
-           * A reference to a class that is a subclass of the `django.db.models.Model` class.
-           * This is an approximation, since it simply matches identifiers.
-           */
-          private DataFlow::Node subclassRef(DataFlow::TypeTracker t) {
-            t.start() and
-            result.asCfgNode().(NameNode).getId() = any(SubclassDef cd).getName()
-            or
-            exists(DataFlow::TypeTracker t2 | result = subclassRef(t2).track(t2, t))
-          }
-
-          /**
-           * A reference to a class that is a subclass of the `django.db.models.Model` class.
-           * This is an approximation, since it simply matches identifiers.
-           */
-          DataFlow::Node subclassRef() { result = subclassRef(DataFlow::TypeTracker::end()) }
        }

        /** Gets a reference to the `objects` object of a django model. */
        private DataFlow::Node objects(DataFlow::TypeTracker t) {
          t.startInAttr("objects") and
-          result = Model::subclassRef()
+          result = Model::classRef()
          or
          exists(DataFlow::TypeTracker t2 | result = objects(t2).track(t2, t))
        }
--- a/python/ql/src/python.qll
+++ b/python/ql/src/python.qll
@@ -35,3 +35,7 @@ import semmle.python.pointsto.Context
 import semmle.python.pointsto.CallGraph
 import semmle.python.objects.ObjectAPI
 import site
+// Removing this import perturbs the compilation process enough that the points-to analysis gets
+// compiled -- and cached -- differently depending on whether the data flow library is imported. By
+// importing it privately here, we ensure that the points-to analysis is compiled the same way.
+private import experimental.dataflow.DataFlow