Merge branch 'main' into stdlib-optparse

2026-02-23 18:33:42 +01:00 · 2024-10-01 12:48:09 +02:00
parent 2eac11edd6 cb0b388345
commit 7816f34d75
735 changed files with 7366 additions and 4701 deletions
--- a/python/ql/lib/CHANGELOG.md
+++ b/python/ql/lib/CHANGELOG.md
@@ -1,3 +1,13 @@
+## 2.1.0
+
+### New Features
+
+* Added support for custom threat-models, which can be used in most of our taint-tracking queries, see our [documentation](https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning#extending-codeql-coverage-with-threat-models) for more details.
+
+### Minor Analysis Improvements
+
+* The common sanitizer guard `StringConstCompareBarrier` has been renamed to `ConstCompareBarrier` and expanded to cover comparisons with other constant values such as `None`. This may result in fewer false positive results for several queries. 
+
 ## 2.0.0

 ### Breaking Changes
--- a/python/ql/lib/change-notes/2024-09-20-const-compare-gaurd.md
+++ b/python/ql/lib/change-notes/2024-09-20-const-compare-gaurd.md
@@ -1,4 +0,0 @@
---
-category: minorAnalysis
---
-* The common sanitizer guard `StringConstCompareBarrier` has been renamed to `ConstCompareBarrier` and expanded to cover comparisons with other constant values such as `None`. This may result in fewer false positive results for several queries. 
--- a/python/ql/lib/change-notes/released/2.1.0.md
+++ b/python/ql/lib/change-notes/released/2.1.0.md
@@ -0,0 +1,9 @@
+## 2.1.0
+
+### New Features
+
+* Added support for custom threat-models, which can be used in most of our taint-tracking queries, see our [documentation](https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning#extending-codeql-coverage-with-threat-models) for more details.
+
+### Minor Analysis Improvements
+
+* The common sanitizer guard `StringConstCompareBarrier` has been renamed to `ConstCompareBarrier` and expanded to cover comparisons with other constant values such as `None`. This may result in fewer false positive results for several queries. 
--- a/python/ql/lib/codeql-pack.release.yml
+++ b/python/ql/lib/codeql-pack.release.yml
@@ -1,2 +1,2 @@
 ---
-lastReleaseVersion: 2.0.0
+lastReleaseVersion: 2.1.0
--- a/python/ql/lib/ext/default-threat-models-fixup.model.yml
+++ b/python/ql/lib/ext/default-threat-models-fixup.model.yml
@@ -0,0 +1,8 @@
+extensions:
+  - addsTo:
+      pack: codeql/threat-models
+      extensible: threatModelConfiguration
+    data:
+    # Since responses are enabled by default in the shared threat-models configuration,
+    # we need to disable it here to keep existing behavior for the python analysis.
+      - ["response", false, -2147483647]
--- a/python/ql/lib/qlpack.yml
+++ b/python/ql/lib/qlpack.yml
@@ -1,5 +1,5 @@
 name: codeql/python-all
-version: 2.0.1-dev
+version: 2.1.1-dev
 groups: python
 dbscheme: semmlecode.python.dbscheme
 extractor: python
@@ -9,10 +9,12 @@ dependencies:
  codeql/dataflow: ${workspace}
  codeql/mad: ${workspace}
  codeql/regex: ${workspace}
+  codeql/threat-models: ${workspace}
  codeql/tutorial: ${workspace}
  codeql/util: ${workspace}
  codeql/xml: ${workspace}
  codeql/yaml: ${workspace}
 dataExtensions:
  - semmle/python/frameworks/**/*.model.yml
+  - ext/*.model.yml
 warnOnImplicitThis: true
--- a/python/ql/lib/semmle/python/Concepts.qll
+++ b/python/ql/lib/semmle/python/Concepts.qll
@@ -10,6 +10,62 @@ private import semmle.python.dataflow.new.RemoteFlowSources
 private import semmle.python.dataflow.new.TaintTracking
 private import semmle.python.Frameworks
 private import semmle.python.security.internal.EncryptionKeySizes
+private import codeql.threatmodels.ThreatModels
+
+/**
+ * A data flow source, for a specific threat-model.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `ThreatModelSource::Range` instead.
+ */
+class ThreatModelSource extends DataFlow::Node instanceof ThreatModelSource::Range {
+  /**
+   * Gets a string that represents the source kind with respect to threat modeling.
+   *
+   * See
+   * - https://github.com/github/codeql/blob/main/docs/codeql/reusables/threat-model-description.rst
+   * - https://github.com/github/codeql/blob/main/shared/threat-models/ext/threat-model-grouping.model.yml
+   */
+  string getThreatModel() { result = super.getThreatModel() }
+
+  /** Gets a string that describes the type of this threat-model source. */
+  string getSourceType() { result = super.getSourceType() }
+}
+
+/** Provides a class for modeling new sources for specific threat-models. */
+module ThreatModelSource {
+  /**
+   * A data flow source, for a specific threat-model.
+   *
+   * Extend this class to model new APIs. If you want to refine existing API models,
+   * extend `ThreatModelSource` instead.
+   */
+  abstract class Range extends DataFlow::Node {
+    /**
+     * Gets a string that represents the source kind with respect to threat modeling.
+     *
+     * See
+     * - https://github.com/github/codeql/blob/main/docs/codeql/reusables/threat-model-description.rst
+     * - https://github.com/github/codeql/blob/main/shared/threat-models/ext/threat-model-grouping.model.yml
+     */
+    abstract string getThreatModel();
+
+    /** Gets a string that describes the type of this threat-model source. */
+    abstract string getSourceType();
+  }
+}
+
+/**
+ * A data flow source that is enabled in the current threat model configuration.
+ */
+class ActiveThreatModelSource extends ThreatModelSource {
+  ActiveThreatModelSource() {
+    exists(string kind |
+      currentThreatModel(kind) and
+      this.getThreatModel() = kind
+    )
+  }
+}

 /**
 * A data-flow node that executes an operating system command,
--- a/python/ql/lib/semmle/python/dataflow/new/RemoteFlowSources.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/RemoteFlowSources.qll
@@ -15,10 +15,7 @@ private import semmle.python.Concepts
 * Extend this class to refine existing API models. If you want to model new APIs,
 * extend `RemoteFlowSource::Range` instead.
 */
-class RemoteFlowSource extends DataFlow::Node instanceof RemoteFlowSource::Range {
-  /** Gets a string that describes the type of this remote flow source. */
-  string getSourceType() { result = super.getSourceType() }
-}
+class RemoteFlowSource extends ThreatModelSource instanceof RemoteFlowSource::Range { }

 /** Provides a class for modeling new sources of remote user input. */
 module RemoteFlowSource {
@@ -28,8 +25,7 @@ module RemoteFlowSource {
   * Extend this class to model new APIs. If you want to refine existing API models,
   * extend `RemoteFlowSource` instead.
   */
-  abstract class Range extends DataFlow::Node {
-    /** Gets a string that describes the type of this remote flow source. */
-    abstract string getSourceType();
+  abstract class Range extends ThreatModelSource::Range {
+    override string getThreatModel() { result = "remote" }
  }
 }
--- a/python/ql/lib/semmle/python/frameworks/PEP249.qll
+++ b/python/ql/lib/semmle/python/frameworks/PEP249.qll
@@ -81,6 +81,24 @@ module PEP249 {
    }
  }

+  /** A call to a method that fetches rows from a previous execution. */
+  private class FetchMethodCall extends ThreatModelSource::Range, API::CallNode {
+    FetchMethodCall() {
+      exists(API::Node start |
+        start instanceof DatabaseCursor or start instanceof DatabaseConnection
+      |
+        // note: since we can't currently provide accesspaths for sources, these are all
+        // lumped together, although clearly the fetchmany/fetchall returns a
+        // list/iterable with rows.
+        this = start.getMember(["fetchone", "fetchmany", "fetchall"]).getACall()
+      )
+    }
+
+    override string getThreatModel() { result = "database" }
+
+    override string getSourceType() { result = "cursor.fetch*()" }
+  }
+
  // ---------------------------------------------------------------------------
  // asyncio implementations
  // ---------------------------------------------------------------------------
--- a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml
+++ b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml
@@ -0,0 +1,29 @@
+extensions:
+  - addsTo:
+      pack: codeql/python-all
+      extensible: sourceModel
+    data:
+      - ['os', 'Member[getenv].ReturnValue', 'environment']
+      - ['os', 'Member[getenvb].ReturnValue', 'environment']
+      - ['os', 'Member[environ]', 'environment']
+      - ['os', 'Member[environb]', 'environment']
+      - ['posix', 'Member[environ]', 'environment']
+
+      - ['sys', 'Member[argv]', 'commandargs']
+      - ['sys', 'Member[orig_argv]', 'commandargs']
+
+      - ['sys', 'Member[stdin]', 'stdin']
+      - ['builtins', 'Member[input].ReturnValue', 'stdin']
+      - ['builtins', 'Member[raw_input].ReturnValue', 'stdin'] # python 2 only
+
+
+      # if no argument is given, the default is to use sys.argv[1:]
+      - ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args].WithArity[0].ReturnValue', 'commandargs']
+
+      - ['os', 'Member[read].ReturnValue', 'file']
+  - addsTo:
+      pack: codeql/python-all
+      extensible: summaryModel
+    data:
+      - ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args]', 'Argument[0,args:]', 'ReturnValue', 'taint']
+      # note: taint of attribute lookups is handled in QL
--- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll
+++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll
@@ -349,7 +349,7 @@ module StdlibPrivate {
   * Modeling of path related functions in the `os` module.
   * Wrapped in QL module to make it easy to fold/unfold.
   */
-  private module OsFileSystemAccessModeling {
+  module OsFileSystemAccessModeling {
    /**
     * A call to the `os.fsencode` function.
     *
@@ -406,7 +406,7 @@ module StdlibPrivate {
     *
     * See https://docs.python.org/3/library/os.html#os.open
     */
-    private class OsOpenCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
+    class OsOpenCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
      OsOpenCall() { this = os().getMember("open").getACall() }

      override DataFlow::Node getAPathArgument() {
@@ -1513,13 +1513,22 @@ module StdlibPrivate {
   * See https://docs.python.org/3/library/functions.html#open
   */
  private class OpenCall extends FileSystemAccess::Range, Stdlib::FileLikeObject::InstanceSource,
-    DataFlow::CallCfgNode
+    ThreatModelSource::Range, DataFlow::CallCfgNode
  {
-    OpenCall() { this = getOpenFunctionRef().getACall() }
+    OpenCall() {
+      this = getOpenFunctionRef().getACall() and
+      // when analyzing stdlib code for os.py we wrongly assume that `os.open` is an
+      // alias of the builtins `open` function
+      not this instanceof OsFileSystemAccessModeling::OsOpenCall
+    }

    override DataFlow::Node getAPathArgument() {
      result in [this.getArg(0), this.getArgByName("file")]
    }
+
+    override string getThreatModel() { result = "file" }
+
+    override string getSourceType() { result = "open()" }
  }

  /**
@@ -5008,6 +5017,39 @@ module StdlibPrivate {

    override string getKind() { result = Escaping::getHtmlKind() }
  }
+
+  // ---------------------------------------------------------------------------
+  // argparse
+  // ---------------------------------------------------------------------------
+  /**
+   * if result of `parse_args` is tainted (because it uses command-line arguments),
+   *    then the parsed values accesssed on any attribute lookup is also tainted.
+   */
+  private class ArgumentParserAnyAttributeStep extends TaintTracking::AdditionalTaintStep {
+    override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+      nodeFrom =
+        API::moduleImport("argparse")
+            .getMember("ArgumentParser")
+            .getReturn()
+            .getMember("parse_args")
+            .getReturn()
+            .getAValueReachableFromSource() and
+      nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // sys
+  // ---------------------------------------------------------------------------
+  /**
+   * An access of `sys.stdin`/`sys.stdout`/`sys.stderr`, to get additional FileLike
+   * modeling.
+   */
+  private class SysStandardStreams extends Stdlib::FileLikeObject::InstanceSource, DataFlow::Node {
+    SysStandardStreams() {
+      this = API::moduleImport("sys").getMember(["stdin", "stdout", "stderr"]).asSource()
+    }
+  }
 }

 // ---------------------------------------------------------------------------
--- a/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll
+++ b/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll
@@ -18,14 +18,19 @@ private import semmle.python.dataflow.new.RemoteFlowSources
 private import semmle.python.dataflow.new.DataFlow
 private import semmle.python.ApiGraphs
 private import semmle.python.dataflow.new.FlowSummary
+private import semmle.python.Concepts

 /**
- * A remote flow source originating from a CSV source row.
+ * A threat-model flow source originating from a data extension.
 */
-private class RemoteFlowSourceFromCsv extends RemoteFlowSource::Range {
-  RemoteFlowSourceFromCsv() { this = ModelOutput::getASourceNode("remote").asSource() }
+private class ThreatModelSourceFromDataExtension extends ThreatModelSource::Range {
+  ThreatModelSourceFromDataExtension() { this = ModelOutput::getASourceNode(_).asSource() }

-  override string getSourceType() { result = "Remote flow (from model)" }
+  override string getThreatModel() { this = ModelOutput::getASourceNode(result).asSource() }
+
+  override string getSourceType() {
+    result = "Source node (" + this.getThreatModel() + ") [from data-extension]"
+  }
 }

 private class SummarizedCallableFromModel extends SummarizedCallable {
--- a/python/ql/lib/semmle/python/security/dataflow/CodeInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/CodeInjectionCustomizations.qll
@@ -33,9 +33,14 @@ module CodeInjection {
  abstract class Sanitizer extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * A code execution, considered as a flow sink.
--- a/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll
@@ -33,9 +33,14 @@ module CommandInjection {
  abstract class Sanitizer extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * A command execution, considered as a flow sink.
--- a/python/ql/lib/semmle/python/security/dataflow/CookieInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/CookieInjectionCustomizations.qll
@@ -31,9 +31,14 @@ module CookieInjection {
  abstract class Sanitizer extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * A write to a cookie, considered as a sink.
--- a/python/ql/lib/semmle/python/security/dataflow/HttpHeaderInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/HttpHeaderInjectionCustomizations.qll
@@ -32,9 +32,14 @@ module HttpHeaderInjection {
  abstract class Sanitizer extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * A HTTP header write, considered as a flow sink.
--- a/python/ql/lib/semmle/python/security/dataflow/LdapInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/LdapInjectionCustomizations.qll
@@ -42,9 +42,14 @@ module LdapInjection {
  abstract class FilterSanitizer extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * A logging operation, considered as a flow sink.
--- a/python/ql/lib/semmle/python/security/dataflow/LogInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/LogInjectionCustomizations.qll
@@ -33,9 +33,14 @@ module LogInjection {
  abstract class Sanitizer extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * A logging operation, considered as a flow sink.
--- a/python/ql/lib/semmle/python/security/dataflow/PamAuthorizationCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/PamAuthorizationCustomizations.qll
@@ -7,6 +7,7 @@ import python
 import semmle.python.ApiGraphs
 import semmle.python.dataflow.new.TaintTracking
 import semmle.python.dataflow.new.RemoteFlowSources
+import semmle.python.Concepts

 /**
 * Provides default sources, sinks and sanitizers for detecting
@@ -39,9 +40,14 @@ module PamAuthorizationCustomizations {
  abstract class Sink extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * A vulnerable `pam_authenticate` call considered as a flow sink.
--- a/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll
@@ -43,9 +43,14 @@ module PathInjection {
  abstract class Sanitizer extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * A file system access, considered as a flow sink.
--- a/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSCustomizations.qll
@@ -47,9 +47,14 @@ module PolynomialReDoS {
  abstract class Sanitizer extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * A regex execution, considered as a flow sink.
--- a/python/ql/lib/semmle/python/security/dataflow/ReflectedXSSCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/ReflectedXSSCustomizations.qll
@@ -33,9 +33,14 @@ module ReflectedXss {
  abstract class Sanitizer extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * A data flow sink for "reflected cross-site scripting" vulnerabilities.
--- a/python/ql/lib/semmle/python/security/dataflow/RegexInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/RegexInjectionCustomizations.qll
@@ -40,9 +40,14 @@ module RegexInjection {
  abstract class Sanitizer extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * A regex escaping, considered as a sanitizer.
--- a/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll
@@ -45,9 +45,14 @@ module ServerSideRequestForgery {
  abstract class FullUrlControlSanitizer extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /** The URL of an HTTP request, considered as a sink. */
  class HttpRequestUrlAsSink extends Sink {
--- a/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll
@@ -32,9 +32,14 @@ module SqlInjection {
  abstract class Sanitizer extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * A SQL statement of a SQL construction, considered as a flow sink.
--- a/python/ql/lib/semmle/python/security/dataflow/UnsafeDeserializationCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/UnsafeDeserializationCustomizations.qll
@@ -33,9 +33,14 @@ module UnsafeDeserialization {
  abstract class Sanitizer extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * An insecure decoding, considered as a flow sink.
--- a/python/ql/lib/semmle/python/security/dataflow/UrlRedirectCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/UrlRedirectCustomizations.qll
@@ -77,9 +77,14 @@ module UrlRedirect {
  }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * A HTTP redirect response, considered as a flow sink.
--- a/python/ql/lib/semmle/python/security/dataflow/XpathInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/XpathInjectionCustomizations.qll
@@ -30,9 +30,14 @@ module XpathInjection {
  abstract class Sanitizer extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * A construction of an XPath expression, considered as a sink.
--- a/python/ql/src/CHANGELOG.md
+++ b/python/ql/src/CHANGELOG.md
@@ -1,3 +1,9 @@
+## 1.3.0
+
+### New Queries
+
+* The `py/cors-misconfiguration-with-credentials` query, which finds insecure CORS middleware configurations.
+
 ## 1.2.2

 ### Minor Analysis Improvements
--- a/python/ql/src/change-notes/2024-08-26-Cors-misconfiguration-middleware.md
+++ b/python/ql/src/change-notes/2024-08-26-Cors-misconfiguration-middleware.md
@@ -1,4 +0,0 @@
---
-category: newQuery
---
-* The `py/cors-misconfiguration-with-credentials` query, which finds insecure CORS middleware configurations.
--- a/python/ql/src/change-notes/released/1.3.0.md
+++ b/python/ql/src/change-notes/released/1.3.0.md
@@ -0,0 +1,5 @@
+## 1.3.0
+
+### New Queries
+
+* The `py/cors-misconfiguration-with-credentials` query, which finds insecure CORS middleware configurations.
--- a/python/ql/src/codeql-pack.release.yml
+++ b/python/ql/src/codeql-pack.release.yml
@@ -1,2 +1,2 @@
 ---
-lastReleaseVersion: 1.2.2
+lastReleaseVersion: 1.3.0
--- a/python/ql/src/experimental/Security/CWE-074/TemplateInjectionCustomizations.qll
+++ b/python/ql/src/experimental/Security/CWE-074/TemplateInjectionCustomizations.qll
@@ -33,9 +33,14 @@ module TemplateInjection {
  abstract class Sanitizer extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * A SQL statement of a SQL construction, considered as a flow sink.
--- a/python/ql/src/experimental/Security/CWE-091/XsltInjectionCustomizations.qll
+++ b/python/ql/src/experimental/Security/CWE-091/XsltInjectionCustomizations.qll
@@ -33,9 +33,14 @@ module XsltInjection {
  abstract class Sanitizer extends DataFlow::Node { }

  /**
-   * A source of remote user input, considered as a flow source.
+   * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
   */
-  class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+  deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
+
+  /**
+   * An active threat-model source, considered as a flow source.
+   */
+  private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }

  /**
   * An XSLT construction, considered as a flow sink.
--- a/python/ql/src/experimental/Security/CWE-094/Js2Py.ql
+++ b/python/ql/src/experimental/Security/CWE-094/Js2Py.ql
@@ -18,7 +18,7 @@ import semmle.python.dataflow.new.RemoteFlowSources
 import semmle.python.Concepts

 module Js2PyFlowConfig implements DataFlow::ConfigSig {
-  predicate isSource(DataFlow::Node node) { node instanceof RemoteFlowSource }
+  predicate isSource(DataFlow::Node node) { node instanceof ActiveThreatModelSource }

  predicate isSink(DataFlow::Node node) {
    API::moduleImport("js2py").getMember(["eval_js", "eval_js6", "EvalJs"]).getACall().getArg(_) =
--- a/python/ql/src/qlpack.yml
+++ b/python/ql/src/qlpack.yml
@@ -1,5 +1,5 @@
 name: codeql/python-queries
-version: 1.2.3-dev
+version: 1.3.1-dev
 groups:
  - python
  - queries
--- a/python/ql/test/experimental/meta/ConceptsTest.qll
+++ b/python/ql/test/experimental/meta/ConceptsTest.qll
@@ -3,6 +3,7 @@ import semmle.python.dataflow.new.DataFlow
 import semmle.python.Concepts
 import TestUtilities.InlineExpectationsTest
 private import semmle.python.dataflow.new.internal.PrintNode
+private import codeql.threatmodels.ThreatModels

 module SystemCommandExecutionTest implements TestSig {
  string getARelevantTag() { result = "getCommand" }
@@ -632,6 +633,22 @@ module XmlParsingTest implements TestSig {
  }
 }

+module ThreatModelSourceTest implements TestSig {
+  string getARelevantTag() {
+    exists(string kind | knownThreatModel(kind) | result = "threatModelSource" + "[" + kind + "]")
+  }
+
+  predicate hasActualResult(Location location, string element, string tag, string value) {
+    exists(location.getFile().getRelativePath()) and
+    exists(ThreatModelSource src | not src.getThreatModel() = "remote" |
+      location = src.getLocation() and
+      element = src.toString() and
+      value = prettyNodeForInlineTest(src) and
+      tag = "threatModelSource[" + src.getThreatModel() + "]"
+    )
+  }
+}
+
 module CorsMiddlewareTest implements TestSig {
  string getARelevantTag() { result = "CorsMiddleware" }

@@ -656,4 +673,4 @@ import MakeTest<MergeTests5<MergeTests5<SystemCommandExecutionTest, DecodingTest
  MergeTests5<FileSystemAccessTest, FileSystemWriteAccessTest, PathNormalizationTest,
    SafeAccessCheckTest, PublicKeyGenerationTest>,
  MergeTests5<CryptographicOperationTest, HttpClientRequestTest, CsrfProtectionSettingTest,
-    CsrfLocalProtectionSettingTest, XmlParsingTest>>>
+    CsrfLocalProtectionSettingTest, MergeTests<XmlParsingTest, ThreatModelSourceTest>>>>
--- a/python/ql/test/experimental/meta/InlineTaintTest.qll
+++ b/python/ql/test/experimental/meta/InlineTaintTest.qll
@@ -15,6 +15,7 @@ import semmle.python.dataflow.new.TaintTracking
 import semmle.python.dataflow.new.RemoteFlowSources
 import TestUtilities.InlineExpectationsTest
 private import semmle.python.dataflow.new.internal.PrintNode
+private import semmle.python.Concepts

 DataFlow::Node shouldBeTainted() {
  exists(DataFlow::CallCfgNode call |
@@ -45,7 +46,7 @@ module Conf {
        source.(DataFlow::CfgNode).getNode() = call.getAnArg()
      )
      or
-      source instanceof RemoteFlowSource
+      source instanceof ThreatModelSource
    }

    predicate isSink(DataFlow::Node sink) {
--- a/python/ql/test/library-tests/frameworks/django-v2-v3/manage.py
+++ b/python/ql/test/library-tests/frameworks/django-v2-v3/manage.py
@@ -6,7 +6,7 @@ import sys

 def main():
    """Run administrative tasks."""
-    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings')
+    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings') # $ threatModelSource[environment]=os.environ
    try:
        from django.core.management import execute_from_command_line
    except ImportError as exc:
@@ -15,7 +15,7 @@ def main():
            "available on your PYTHONPATH environment variable? Did you "
            "forget to activate a virtual environment?"
        ) from exc
-    execute_from_command_line(sys.argv)
+    execute_from_command_line(sys.argv) # $ threatModelSource[commandargs]=sys.argv


 if __name__ == '__main__':
--- a/python/ql/test/library-tests/frameworks/django-v2-v3/testproj/asgi.py
+++ b/python/ql/test/library-tests/frameworks/django-v2-v3/testproj/asgi.py
@@ -11,6 +11,6 @@ import os

 from django.core.asgi import get_asgi_application

-os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings')
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings') # $ threatModelSource[environment]=os.environ

 application = get_asgi_application()
--- a/python/ql/test/library-tests/frameworks/django-v2-v3/testproj/wsgi.py
+++ b/python/ql/test/library-tests/frameworks/django-v2-v3/testproj/wsgi.py
@@ -11,6 +11,6 @@ import os

 from django.core.wsgi import get_wsgi_application

-os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings')
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings') # $ threatModelSource[environment]=os.environ

 application = get_wsgi_application()
--- a/python/ql/test/library-tests/frameworks/psycopg/pep249.py
+++ b/python/ql/test/library-tests/frameworks/psycopg/pep249.py
@@ -12,3 +12,24 @@ with psycopg.connect(...) as conn:
    with conn.cursor() as cursor:
        cursor.execute("some sql", (42,))  # $ getSql="some sql"
        cursor.executemany("some sql", [(42,)])  # $ getSql="some sql"
+
+
+        ### test of threat-model sources
+        row = cursor.fetchone() # $ threatModelSource[database]=cursor.fetchone()
+        rows_many = cursor.fetchmany(10) # $ threatModelSource[database]=cursor.fetchmany(..)
+        rows_all = cursor.fetchall() # $ threatModelSource[database]=cursor.fetchall()
+
+        ensure_tainted(
+            row[0],  # $ tainted
+            rows_many[0][0],  # $ tainted
+            rows_all[0][0],  # $ tainted
+
+            # pretending we created cursor to return dictionary results
+            row["column"],  # $ tainted
+            rows_many[0]["column"],  # $ tainted
+            rows_all[0]["column"],  # $ tainted
+        )
+        for row in rows_many:
+            ensure_tainted(row[0], row["column"]) # $ tainted
+        for row in rows_all:
+            ensure_tainted(row[0], row["column"]) # tainted
--- a/python/ql/test/library-tests/frameworks/rest_framework/manage.py
+++ b/python/ql/test/library-tests/frameworks/rest_framework/manage.py
@@ -6,7 +6,7 @@ import sys

 def main():
    """Run administrative tasks."""
-    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings')
+    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings') # $ threatModelSource[environment]=os.environ
    try:
        from django.core.management import execute_from_command_line
    except ImportError as exc:
@@ -15,7 +15,7 @@ def main():
            "available on your PYTHONPATH environment variable? Did you "
            "forget to activate a virtual environment?"
        ) from exc
-    execute_from_command_line(sys.argv)
+    execute_from_command_line(sys.argv) # $ threatModelSource[commandargs]=sys.argv


 if __name__ == '__main__':
--- a/python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py
+++ b/python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py
@@ -17,7 +17,7 @@ p.open("wt").write("hello")  # $ getAPathArgument=p fileWriteData="hello"

 name = windows.parent.name
 o = open
-o(name)  # $ getAPathArgument=name
+o(name)  # $ getAPathArgument=name threatModelSource[file]=o(..)

 wb = p.write_bytes
 wb(b"hello")  # $ getAPathArgument=p fileWriteData=b"hello"
--- a/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py
+++ b/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py
@@ -5,25 +5,25 @@ import stat
 import tempfile
 import shutil

-open("file")  # $ getAPathArgument="file"
-open(file="file")  # $ getAPathArgument="file"
+open("file")  # $ getAPathArgument="file" threatModelSource[file]=open(..)
+open(file="file")  # $ getAPathArgument="file" threatModelSource[file]=open(..)

 o = open

-o("file")  # $ getAPathArgument="file"
-o(file="file")  # $ getAPathArgument="file"
+o("file")  # $ getAPathArgument="file" threatModelSource[file]=o(..)
+o(file="file")  # $ getAPathArgument="file" threatModelSource[file]=o(..)


-builtins.open("file")  # $ getAPathArgument="file"
-builtins.open(file="file")  # $ getAPathArgument="file"
+builtins.open("file")  # $ getAPathArgument="file" threatModelSource[file]=builtins.open(..)
+builtins.open(file="file")  # $ getAPathArgument="file" threatModelSource[file]=builtins.open(..)


-io.open("file")  # $ getAPathArgument="file"
-io.open(file="file")  # $ getAPathArgument="file"
+io.open("file")  # $ getAPathArgument="file" threatModelSource[file]=io.open(..)
+io.open(file="file")  # $ getAPathArgument="file" threatModelSource[file]=io.open(..)
 io.open_code("file")  # $ getAPathArgument="file"
 io.FileIO("file")  # $ getAPathArgument="file"

-f = open("path") # $ getAPathArgument="path"
+f = open("path") # $ getAPathArgument="path" threatModelSource[file]=open(..)
 f.write("foo") # $ getAPathArgument="path" fileWriteData="foo"
 lines = ["foo"]
 f.writelines(lines) # $ getAPathArgument="path" fileWriteData=lines
--- a/python/ql/test/library-tests/frameworks/stdlib/threat_models.py
+++ b/python/ql/test/library-tests/frameworks/stdlib/threat_models.py
@@ -0,0 +1,71 @@
+import os
+import sys
+import posix
+
+ensure_tainted(
+    os.getenv("foo"), # $ tainted threatModelSource[environment]=os.getenv(..)
+    os.getenvb("bar"), # $ tainted threatModelSource[environment]=os.getenvb(..)
+
+    os.environ["foo"], # $ tainted threatModelSource[environment]=os.environ
+    os.environ.get("foo"), # $ tainted threatModelSource[environment]=os.environ
+
+    os.environb["bar"], # $ tainted threatModelSource[environment]=os.environb
+    posix.environ[b"foo"], # $ tainted threatModelSource[environment]=posix.environ
+
+
+    sys.argv[1], # $ tainted threatModelSource[commandargs]=sys.argv
+    sys.orig_argv[1], # $ tainted threatModelSource[commandargs]=sys.orig_argv
+)
+
+for k,v in os.environ.items(): # $ threatModelSource[environment]=os.environ
+    ensure_tainted(k) # $ tainted
+    ensure_tainted(v) # $ tainted
+
+
+########################################
+# argparse
+########################################
+
+import argparse
+parser = argparse.ArgumentParser()
+parser.add_argument("foo")
+
+args = parser.parse_args() # $ threatModelSource[commandargs]=parser.parse_args()
+ensure_tainted(args.foo) # $ tainted
+
+explicit_argv_parsing = parser.parse_args(sys.argv) # $ threatModelSource[commandargs]=sys.argv
+ensure_tainted(explicit_argv_parsing.foo) # $ tainted
+
+fake_args = parser.parse_args(["<foo>"])
+ensure_not_tainted(fake_args.foo) # $ SPURIOUS: tainted
+
+########################################
+# reading input from stdin
+########################################
+
+ensure_tainted(
+    sys.stdin.readline(), # $ tainted threatModelSource[stdin]=sys.stdin
+    input(), # $ tainted threatModelSource[stdin]=input()
+)
+
+########################################
+# reading data from files
+########################################
+
+ensure_tainted(
+    open("foo"), # $ tainted threatModelSource[file]=open(..) getAPathArgument="foo"
+    open("foo").read(), # $ tainted threatModelSource[file]=open(..) getAPathArgument="foo"
+    open("foo").readline(), # $ tainted threatModelSource[file]=open(..) getAPathArgument="foo"
+    open("foo").readlines(), # $ tainted threatModelSource[file]=open(..) getAPathArgument="foo"
+
+    os.read(os.open("foo"), 1024), # $ tainted threatModelSource[file]=os.read(..) getAPathArgument="foo"
+)
+
+########################################
+# socket
+########################################
+
+import socket
+s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+s.connect(("example.com", 1234))
+ensure_tainted(s.recv(1024)) # $ MISSING: tainted threatModelSource[socket]
--- a/python/ql/test/library-tests/frameworks/stdlib/wsgiref_simple_server_test.py
+++ b/python/ql/test/library-tests/frameworks/stdlib/wsgiref_simple_server_test.py
@@ -45,7 +45,7 @@ def func2(environ, start_response): # $ requestHandler
    start_response(status, headers) # $ headerWriteBulk=headers headerWriteBulkUnsanitized=name,value
    return [b"Hello"] # $ HttpResponse responseBody=List

-case = sys.argv[1]
+case = sys.argv[1] # $ threatModelSource[commandargs]=sys.argv
 if case == "1":
    server = wsgiref.simple_server.WSGIServer(ADDRESS, wsgiref.simple_server.WSGIRequestHandler)
    server.set_app(func)
--- a/python/ql/test/library-tests/threat-models/default/ActiveKinds.expected
+++ b/python/ql/test/library-tests/threat-models/default/ActiveKinds.expected
@@ -0,0 +1,3 @@
+| default |
+| remote |
+| request |
--- a/python/ql/test/library-tests/threat-models/default/ActiveKinds.ql
+++ b/python/ql/test/library-tests/threat-models/default/ActiveKinds.ql
@@ -0,0 +1,7 @@
+private import codeql.threatmodels.ThreatModels
+
+from string kind
+where
+  knownThreatModel(kind) and
+  currentThreatModel(kind)
+select kind
--- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.expected
+++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.expected
@@ -0,0 +1,8 @@
+edges
+| test.py:6:14:6:21 | ControlFlowNode for Attribute | test.py:6:14:6:24 | ControlFlowNode for Subscript | provenance | Src:MaD:17  |
+nodes
+| test.py:6:14:6:21 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
+| test.py:6:14:6:24 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
+subpaths
+#select
+| test.py:6:14:6:24 | ControlFlowNode for Subscript | test.py:6:14:6:21 | ControlFlowNode for Attribute | test.py:6:14:6:24 | ControlFlowNode for Subscript | This SQL query depends on a $@. | test.py:6:14:6:21 | ControlFlowNode for Attribute | user-provided value |
--- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.ext.yml
+++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.ext.yml
@@ -0,0 +1,6 @@
+extensions:
+  - addsTo:
+      pack: codeql/threat-models
+      extensible: threatModelConfiguration
+    data:
+      - ["local", true, 0]
--- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.qlref
+++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.qlref
@@ -0,0 +1 @@
+Security/CWE-089/SqlInjection.ql
--- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/test.py
+++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/test.py
@@ -0,0 +1,6 @@
+# test that enabling local threat-model works end-to-end
+import sys
+import psycopg
+
+conn = psycopg.connect(...)
+conn.execute(sys.argv[1])