Merge branch 'main' of github.com:github/codeql into RasmusWL-python-port-reflected-xss

2026-04-29 10:45:15 +02:00 · 2020-10-30 17:56:36 +01:00
parent ef9999a4a1 146787bb55
commit 80360450de
65 changed files with 14172 additions and 5507 deletions
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-022/ChainedConfigs12.qll
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-022/ChainedConfigs12.qll
@@ -0,0 +1,75 @@
+/**
+ * This defines a `PathGraph` where sinks from `TaintTracking::Configuration`s are identified with
+ * sources from `TaintTracking2::Configuration`s if they represent the same `ControlFlowNode`.
+ *
+ * Paths are then connected appropriately.
+ */
+
+import python
+import experimental.dataflow.DataFlow
+import experimental.dataflow.DataFlow2
+import experimental.dataflow.TaintTracking
+import experimental.dataflow.TaintTracking2
+
+/**
+ * A `DataFlow::Node` that appears as a sink in Config1 and a source in Config2.
+ */
+private predicate crossoverNode(DataFlow::Node n) {
+  any(TaintTracking::Configuration t1).isSink(n) and
+  any(TaintTracking2::Configuration t2).isSource(n)
+}
+
+/**
+ * A new type which represents the union of the two sets of nodes.
+ */
+private newtype TCustomPathNode =
+  Config1Node(DataFlow::PathNode node1) { not crossoverNode(node1.getNode()) } or
+  Config2Node(DataFlow2::PathNode node2) { not crossoverNode(node2.getNode()) } or
+  CrossoverNode(DataFlow::Node node) { crossoverNode(node) }
+
+/**
+ * A class representing the set of all the path nodes in either config.
+ */
+class CustomPathNode extends TCustomPathNode {
+  /** Gets the PathNode if it is in Config1. */
+  DataFlow::PathNode asNode1() {
+    this = Config1Node(result) or this = CrossoverNode(result.getNode())
+  }
+
+  /** Gets the PathNode if it is in Config2. */
+  DataFlow2::PathNode asNode2() {
+    this = Config2Node(result) or this = CrossoverNode(result.getNode())
+  }
+
+  predicate hasLocationInfo(
+    string filepath, int startline, int startcolumn, int endline, int endcolumn
+  ) {
+    asNode1().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+    or
+    asNode2().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+  }
+
+  string toString() {
+    result = asNode1().toString()
+    or
+    result = asNode2().toString()
+  }
+}
+
+/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
+query predicate edges(CustomPathNode a, CustomPathNode b) {
+  // Edge is in Config1 graph
+  DataFlow::PathGraph::edges(a.asNode1(), b.asNode1())
+  or
+  // Edge is in Config2 graph
+  DataFlow2::PathGraph::edges(a.asNode2(), b.asNode2())
+}
+
+/** Holds if `n` is a node in the graph of data flow path explanations. */
+query predicate nodes(CustomPathNode n, string key, string val) {
+  // Node is in Config1 graph
+  DataFlow::PathGraph::nodes(n.asNode1(), key, val)
+  or
+  // Node is in Config2 graph
+  DataFlow2::PathGraph::nodes(n.asNode2(), key, val)
+}
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-022/PathInjection.ql
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-022/PathInjection.ql
@@ -0,0 +1,116 @@
+/**
+ * @name Uncontrolled data used in path expression
+ * @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
+ * @kind path-problem
+ * @problem.severity error
+ * @sub-severity high
+ * @precision high
+ * @id py/path-injection
+ * @tags correctness
+ *       security
+ *       external/owasp/owasp-a1
+ *       external/cwe/cwe-022
+ *       external/cwe/cwe-023
+ *       external/cwe/cwe-036
+ *       external/cwe/cwe-073
+ *       external/cwe/cwe-099
+ *
+ * The query detects cases where a user-controlled path is used in an unsafe manner,
+ * meaning it is not both normalized and _afterwards_ checked.
+ *
+ * It does so by dividing the problematic situation into two cases:
+ *  1. The file path is never normalized.
+ *     This is easily detected by using normalization as a sanitizer.
+ *
+ *  2. The file path is normalized at least once, but never checked afterwards.
+ *     This is detected by finding the earliest normalization and then ensuring that
+ *     no checks happen later. Since we start from the earliest normalization,
+ *     we know that the absence of checks means that no normalization has a
+ *     check after it. (No checks after a second normalization would be ok if
+ *     there was a check between the first and the second.)
+ *
+ * Note that one could make the dual split on whether the file path is ever checked. This does
+ * not work as nicely, however, since checking is modelled as a `BarrierGuard` rather than
+ * as a `Sanitizer`. That means that only some dataflow paths out of a check will be removed,
+ * and so identifying the last check is not possible simply by finding a dataflow path from it
+ * to a sink.
+ */
+
+import python
+import experimental.dataflow.DataFlow
+import experimental.dataflow.DataFlow2
+import experimental.dataflow.TaintTracking
+import experimental.dataflow.TaintTracking2
+import experimental.semmle.python.Concepts
+import experimental.dataflow.RemoteFlowSources
+import ChainedConfigs12
+
+// ---------------------------------------------------------------------------
+// Case 1. The path is never normalized.
+// ---------------------------------------------------------------------------
+/** Configuration to find paths from sources to sinks that contain no normalization. */
+class PathNotNormalizedConfiguration extends TaintTracking::Configuration {
+  PathNotNormalizedConfiguration() { this = "PathNotNormalizedConfiguration" }
+
+  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
+
+  override predicate isSink(DataFlow::Node sink) {
+    sink = any(FileSystemAccess e).getAPathArgument()
+  }
+
+  override predicate isSanitizer(DataFlow::Node node) { node instanceof Path::PathNormalization }
+}
+
+predicate pathNotNormalized(CustomPathNode source, CustomPathNode sink) {
+  any(PathNotNormalizedConfiguration config).hasFlowPath(source.asNode1(), sink.asNode1())
+}
+
+// ---------------------------------------------------------------------------
+// Case 2. The path is normalized at least once, but never checked afterwards.
+// ---------------------------------------------------------------------------
+/** Configuration to find paths from sources to normalizations that contain no prior normalizations. */
+class FirstNormalizationConfiguration extends TaintTracking::Configuration {
+  FirstNormalizationConfiguration() { this = "FirstNormalizationConfiguration" }
+
+  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
+
+  override predicate isSink(DataFlow::Node sink) { sink instanceof Path::PathNormalization }
+
+  override predicate isSanitizerOut(DataFlow::Node node) { node instanceof Path::PathNormalization }
+}
+
+/** Configuration to find paths from normalizations to sinks that do not go through a check. */
+class NormalizedPathNotCheckedConfiguration extends TaintTracking2::Configuration {
+  NormalizedPathNotCheckedConfiguration() { this = "NormalizedPathNotCheckedConfiguration" }
+
+  override predicate isSource(DataFlow::Node source) { source instanceof Path::PathNormalization }
+
+  override predicate isSink(DataFlow::Node sink) {
+    sink = any(FileSystemAccess e).getAPathArgument()
+  }
+
+  override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
+    guard instanceof Path::SafeAccessCheck
+  }
+}
+
+predicate pathNotCheckedAfterNormalization(CustomPathNode source, CustomPathNode sink) {
+  exists(
+    FirstNormalizationConfiguration config, DataFlow::PathNode mid1, DataFlow2::PathNode mid2,
+    NormalizedPathNotCheckedConfiguration config2
+  |
+    config.hasFlowPath(source.asNode1(), mid1) and
+    config2.hasFlowPath(mid2, sink.asNode2()) and
+    mid1.getNode().asCfgNode() = mid2.getNode().asCfgNode()
+  )
+}
+
+// ---------------------------------------------------------------------------
+// Query: Either case 1 or case 2.
+// ---------------------------------------------------------------------------
+from CustomPathNode source, CustomPathNode sink
+where
+  pathNotNormalized(source, sink)
+  or
+  pathNotCheckedAfterNormalization(source, sink)
+select sink, source, sink, "This path depends on $@.", source, "a user-provided value"
--- a/python/ql/src/experimental/dataflow/DataFlow3.qll
+++ b/python/ql/src/experimental/dataflow/DataFlow3.qll
@@ -0,0 +1,26 @@
+/**
+ * Provides a library for local (intra-procedural) and global (inter-procedural)
+ * data flow analysis: deciding whether data can flow from a _source_ to a
+ * _sink_.
+ *
+ * Unless configured otherwise, _flow_ means that the exact value of
+ * the source may reach the sink. We do not track flow across pointer
+ * dereferences or array indexing. To track these types of flow, where the
+ * exact value may not be preserved, import
+ * `experimental.dataflow.TaintTracking`.
+ *
+ * To use global (interprocedural) data flow, extend the class
+ * `DataFlow::Configuration` as documented on that class. To use local
+ * (intraprocedural) data flow, call `DataFlow::localFlow` or
+ * `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
+ */
+
+private import python
+
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) data flow analyses.
+ */
+module DataFlow3 {
+  import experimental.dataflow.internal.DataFlowImpl3
+}
--- a/python/ql/src/experimental/dataflow/DataFlow4.qll
+++ b/python/ql/src/experimental/dataflow/DataFlow4.qll
@@ -0,0 +1,26 @@
+/**
+ * Provides a library for local (intra-procedural) and global (inter-procedural)
+ * data flow analysis: deciding whether data can flow from a _source_ to a
+ * _sink_.
+ *
+ * Unless configured otherwise, _flow_ means that the exact value of
+ * the source may reach the sink. We do not track flow across pointer
+ * dereferences or array indexing. To track these types of flow, where the
+ * exact value may not be preserved, import
+ * `experimental.dataflow.TaintTracking`.
+ *
+ * To use global (interprocedural) data flow, extend the class
+ * `DataFlow::Configuration` as documented on that class. To use local
+ * (intraprocedural) data flow, call `DataFlow::localFlow` or
+ * `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
+ */
+
+private import python
+
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) data flow analyses.
+ */
+module DataFlow4 {
+  import experimental.dataflow.internal.DataFlowImpl4
+}
--- a/python/ql/src/experimental/dataflow/TaintTracking2.qll
+++ b/python/ql/src/experimental/dataflow/TaintTracking2.qll
@@ -0,0 +1,19 @@
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) taint-tracking analyses.
+ *
+ * To use global (interprocedural) taint tracking, extend the class
+ * `TaintTracking::Configuration` as documented on that class. To use local
+ * (intraprocedural) taint tracking, call `TaintTracking::localTaint` or
+ * `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
+ */
+
+private import python
+
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) taint-tracking analyses.
+ */
+module TaintTracking2 {
+  import experimental.dataflow.internal.tainttracking2.TaintTrackingImpl
+}
--- a/python/ql/src/experimental/dataflow/TaintTracking3.qll
+++ b/python/ql/src/experimental/dataflow/TaintTracking3.qll
@@ -0,0 +1,19 @@
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) taint-tracking analyses.
+ *
+ * To use global (interprocedural) taint tracking, extend the class
+ * `TaintTracking::Configuration` as documented on that class. To use local
+ * (intraprocedural) taint tracking, call `TaintTracking::localTaint` or
+ * `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
+ */
+
+private import python
+
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) taint-tracking analyses.
+ */
+module TaintTracking3 {
+  import experimental.dataflow.internal.tainttracking3.TaintTrackingImpl
+}
--- a/python/ql/src/experimental/dataflow/TaintTracking4.qll
+++ b/python/ql/src/experimental/dataflow/TaintTracking4.qll
@@ -0,0 +1,19 @@
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) taint-tracking analyses.
+ *
+ * To use global (interprocedural) taint tracking, extend the class
+ * `TaintTracking::Configuration` as documented on that class. To use local
+ * (intraprocedural) taint tracking, call `TaintTracking::localTaint` or
+ * `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
+ */
+
+private import python
+
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) taint-tracking analyses.
+ */
+module TaintTracking4 {
+  import experimental.dataflow.internal.tainttracking4.TaintTrackingImpl
+}
--- a/python/ql/src/experimental/dataflow/internal/DataFlowImpl3.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowImpl3.qll
--- a/python/ql/src/experimental/dataflow/internal/DataFlowImpl4.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowImpl4.qll
--- a/python/ql/src/experimental/dataflow/internal/tainttracking2/TaintTrackingImpl.qll
+++ b/python/ql/src/experimental/dataflow/internal/tainttracking2/TaintTrackingImpl.qll
@@ -0,0 +1,115 @@
+/**
+ * Provides an implementation of global (interprocedural) taint tracking.
+ * This file re-exports the local (intraprocedural) taint-tracking analysis
+ * from `TaintTrackingParameter::Public` and adds a global analysis, mainly
+ * exposed through the `Configuration` class. For some languages, this file
+ * exists in several identical copies, allowing queries to use multiple
+ * `Configuration` classes that depend on each other without introducing
+ * mutual recursion among those configurations.
+ */
+
+import TaintTrackingParameter::Public
+private import TaintTrackingParameter::Private
+
+/**
+ * A configuration of interprocedural taint tracking analysis. This defines
+ * sources, sinks, and any other configurable aspect of the analysis. Each
+ * use of the taint tracking library must define its own unique extension of
+ * this abstract class.
+ *
+ * A taint-tracking configuration is a special data flow configuration
+ * (`DataFlow::Configuration`) that allows for flow through nodes that do not
+ * necessarily preserve values but are still relevant from a taint tracking
+ * perspective. (For example, string concatenation, where one of the operands
+ * is tainted.)
+ *
+ * To create a configuration, extend this class with a subclass whose
+ * characteristic predicate is a unique singleton string. For example, write
+ *
+ * ```ql
+ * class MyAnalysisConfiguration extends TaintTracking::Configuration {
+ *   MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
+ *   // Override `isSource` and `isSink`.
+ *   // Optionally override `isSanitizer`.
+ *   // Optionally override `isSanitizerIn`.
+ *   // Optionally override `isSanitizerOut`.
+ *   // Optionally override `isSanitizerGuard`.
+ *   // Optionally override `isAdditionalTaintStep`.
+ * }
+ * ```
+ *
+ * Then, to query whether there is flow between some `source` and `sink`,
+ * write
+ *
+ * ```ql
+ * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
+ * ```
+ *
+ * Multiple configurations can coexist, but it is unsupported to depend on
+ * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
+ * overridden predicates that define sources, sinks, or additional steps.
+ * Instead, the dependency should go to a `TaintTracking2::Configuration` or a
+ * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
+ */
+abstract class Configuration extends DataFlow::Configuration {
+  bindingset[this]
+  Configuration() { any() }
+
+  /**
+   * Holds if `source` is a relevant taint source.
+   *
+   * The smaller this predicate is, the faster `hasFlow()` will converge.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  abstract override predicate isSource(DataFlow::Node source);
+
+  /**
+   * Holds if `sink` is a relevant taint sink.
+   *
+   * The smaller this predicate is, the faster `hasFlow()` will converge.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  abstract override predicate isSink(DataFlow::Node sink);
+
+  /** Holds if the node `node` is a taint sanitizer. */
+  predicate isSanitizer(DataFlow::Node node) { none() }
+
+  final override predicate isBarrier(DataFlow::Node node) {
+    isSanitizer(node) or
+    defaultTaintSanitizer(node)
+  }
+
+  /** Holds if taint propagation into `node` is prohibited. */
+  predicate isSanitizerIn(DataFlow::Node node) { none() }
+
+  final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
+
+  /** Holds if taint propagation out of `node` is prohibited. */
+  predicate isSanitizerOut(DataFlow::Node node) { none() }
+
+  final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
+
+  /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
+  predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
+
+  final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
+
+  /**
+   * Holds if the additional taint propagation step from `node1` to `node2`
+   * must be taken into account in the analysis.
+   */
+  predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
+
+  final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
+    isAdditionalTaintStep(node1, node2) or
+    defaultAdditionalTaintStep(node1, node2)
+  }
+
+  /**
+   * Holds if taint may flow from `source` to `sink` for this configuration.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
+    super.hasFlow(source, sink)
+  }
+}
--- a/python/ql/src/experimental/dataflow/internal/tainttracking2/TaintTrackingParameter.qll
+++ b/python/ql/src/experimental/dataflow/internal/tainttracking2/TaintTrackingParameter.qll
@@ -0,0 +1,6 @@
+import experimental.dataflow.internal.TaintTrackingPublic as Public
+
+module Private {
+  import experimental.dataflow.DataFlow2::DataFlow2 as DataFlow
+  import experimental.dataflow.internal.TaintTrackingPrivate
+}
--- a/python/ql/src/experimental/dataflow/internal/tainttracking3/TaintTrackingImpl.qll
+++ b/python/ql/src/experimental/dataflow/internal/tainttracking3/TaintTrackingImpl.qll
@@ -0,0 +1,115 @@
+/**
+ * Provides an implementation of global (interprocedural) taint tracking.
+ * This file re-exports the local (intraprocedural) taint-tracking analysis
+ * from `TaintTrackingParameter::Public` and adds a global analysis, mainly
+ * exposed through the `Configuration` class. For some languages, this file
+ * exists in several identical copies, allowing queries to use multiple
+ * `Configuration` classes that depend on each other without introducing
+ * mutual recursion among those configurations.
+ */
+
+import TaintTrackingParameter::Public
+private import TaintTrackingParameter::Private
+
+/**
+ * A configuration of interprocedural taint tracking analysis. This defines
+ * sources, sinks, and any other configurable aspect of the analysis. Each
+ * use of the taint tracking library must define its own unique extension of
+ * this abstract class.
+ *
+ * A taint-tracking configuration is a special data flow configuration
+ * (`DataFlow::Configuration`) that allows for flow through nodes that do not
+ * necessarily preserve values but are still relevant from a taint tracking
+ * perspective. (For example, string concatenation, where one of the operands
+ * is tainted.)
+ *
+ * To create a configuration, extend this class with a subclass whose
+ * characteristic predicate is a unique singleton string. For example, write
+ *
+ * ```ql
+ * class MyAnalysisConfiguration extends TaintTracking::Configuration {
+ *   MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
+ *   // Override `isSource` and `isSink`.
+ *   // Optionally override `isSanitizer`.
+ *   // Optionally override `isSanitizerIn`.
+ *   // Optionally override `isSanitizerOut`.
+ *   // Optionally override `isSanitizerGuard`.
+ *   // Optionally override `isAdditionalTaintStep`.
+ * }
+ * ```
+ *
+ * Then, to query whether there is flow between some `source` and `sink`,
+ * write
+ *
+ * ```ql
+ * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
+ * ```
+ *
+ * Multiple configurations can coexist, but it is unsupported to depend on
+ * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
+ * overridden predicates that define sources, sinks, or additional steps.
+ * Instead, the dependency should go to a `TaintTracking2::Configuration` or a
+ * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
+ */
+abstract class Configuration extends DataFlow::Configuration {
+  bindingset[this]
+  Configuration() { any() }
+
+  /**
+   * Holds if `source` is a relevant taint source.
+   *
+   * The smaller this predicate is, the faster `hasFlow()` will converge.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  abstract override predicate isSource(DataFlow::Node source);
+
+  /**
+   * Holds if `sink` is a relevant taint sink.
+   *
+   * The smaller this predicate is, the faster `hasFlow()` will converge.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  abstract override predicate isSink(DataFlow::Node sink);
+
+  /** Holds if the node `node` is a taint sanitizer. */
+  predicate isSanitizer(DataFlow::Node node) { none() }
+
+  final override predicate isBarrier(DataFlow::Node node) {
+    isSanitizer(node) or
+    defaultTaintSanitizer(node)
+  }
+
+  /** Holds if taint propagation into `node` is prohibited. */
+  predicate isSanitizerIn(DataFlow::Node node) { none() }
+
+  final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
+
+  /** Holds if taint propagation out of `node` is prohibited. */
+  predicate isSanitizerOut(DataFlow::Node node) { none() }
+
+  final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
+
+  /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
+  predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
+
+  final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
+
+  /**
+   * Holds if the additional taint propagation step from `node1` to `node2`
+   * must be taken into account in the analysis.
+   */
+  predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
+
+  final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
+    isAdditionalTaintStep(node1, node2) or
+    defaultAdditionalTaintStep(node1, node2)
+  }
+
+  /**
+   * Holds if taint may flow from `source` to `sink` for this configuration.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
+    super.hasFlow(source, sink)
+  }
+}
--- a/python/ql/src/experimental/dataflow/internal/tainttracking3/TaintTrackingParameter.qll
+++ b/python/ql/src/experimental/dataflow/internal/tainttracking3/TaintTrackingParameter.qll
@@ -0,0 +1,6 @@
+import experimental.dataflow.internal.TaintTrackingPublic as Public
+
+module Private {
+  import experimental.dataflow.DataFlow3::DataFlow3 as DataFlow
+  import experimental.dataflow.internal.TaintTrackingPrivate
+}
--- a/python/ql/src/experimental/dataflow/internal/tainttracking4/TaintTrackingImpl.qll
+++ b/python/ql/src/experimental/dataflow/internal/tainttracking4/TaintTrackingImpl.qll
@@ -0,0 +1,115 @@
+/**
+ * Provides an implementation of global (interprocedural) taint tracking.
+ * This file re-exports the local (intraprocedural) taint-tracking analysis
+ * from `TaintTrackingParameter::Public` and adds a global analysis, mainly
+ * exposed through the `Configuration` class. For some languages, this file
+ * exists in several identical copies, allowing queries to use multiple
+ * `Configuration` classes that depend on each other without introducing
+ * mutual recursion among those configurations.
+ */
+
+import TaintTrackingParameter::Public
+private import TaintTrackingParameter::Private
+
+/**
+ * A configuration of interprocedural taint tracking analysis. This defines
+ * sources, sinks, and any other configurable aspect of the analysis. Each
+ * use of the taint tracking library must define its own unique extension of
+ * this abstract class.
+ *
+ * A taint-tracking configuration is a special data flow configuration
+ * (`DataFlow::Configuration`) that allows for flow through nodes that do not
+ * necessarily preserve values but are still relevant from a taint tracking
+ * perspective. (For example, string concatenation, where one of the operands
+ * is tainted.)
+ *
+ * To create a configuration, extend this class with a subclass whose
+ * characteristic predicate is a unique singleton string. For example, write
+ *
+ * ```ql
+ * class MyAnalysisConfiguration extends TaintTracking::Configuration {
+ *   MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
+ *   // Override `isSource` and `isSink`.
+ *   // Optionally override `isSanitizer`.
+ *   // Optionally override `isSanitizerIn`.
+ *   // Optionally override `isSanitizerOut`.
+ *   // Optionally override `isSanitizerGuard`.
+ *   // Optionally override `isAdditionalTaintStep`.
+ * }
+ * ```
+ *
+ * Then, to query whether there is flow between some `source` and `sink`,
+ * write
+ *
+ * ```ql
+ * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
+ * ```
+ *
+ * Multiple configurations can coexist, but it is unsupported to depend on
+ * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
+ * overridden predicates that define sources, sinks, or additional steps.
+ * Instead, the dependency should go to a `TaintTracking2::Configuration` or a
+ * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
+ */
+abstract class Configuration extends DataFlow::Configuration {
+  bindingset[this]
+  Configuration() { any() }
+
+  /**
+   * Holds if `source` is a relevant taint source.
+   *
+   * The smaller this predicate is, the faster `hasFlow()` will converge.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  abstract override predicate isSource(DataFlow::Node source);
+
+  /**
+   * Holds if `sink` is a relevant taint sink.
+   *
+   * The smaller this predicate is, the faster `hasFlow()` will converge.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  abstract override predicate isSink(DataFlow::Node sink);
+
+  /** Holds if the node `node` is a taint sanitizer. */
+  predicate isSanitizer(DataFlow::Node node) { none() }
+
+  final override predicate isBarrier(DataFlow::Node node) {
+    isSanitizer(node) or
+    defaultTaintSanitizer(node)
+  }
+
+  /** Holds if taint propagation into `node` is prohibited. */
+  predicate isSanitizerIn(DataFlow::Node node) { none() }
+
+  final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
+
+  /** Holds if taint propagation out of `node` is prohibited. */
+  predicate isSanitizerOut(DataFlow::Node node) { none() }
+
+  final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
+
+  /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
+  predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
+
+  final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
+
+  /**
+   * Holds if the additional taint propagation step from `node1` to `node2`
+   * must be taken into account in the analysis.
+   */
+  predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
+
+  final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
+    isAdditionalTaintStep(node1, node2) or
+    defaultAdditionalTaintStep(node1, node2)
+  }
+
+  /**
+   * Holds if taint may flow from `source` to `sink` for this configuration.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
+    super.hasFlow(source, sink)
+  }
+}
--- a/python/ql/src/experimental/dataflow/internal/tainttracking4/TaintTrackingParameter.qll
+++ b/python/ql/src/experimental/dataflow/internal/tainttracking4/TaintTrackingParameter.qll
@@ -0,0 +1,6 @@
+import experimental.dataflow.internal.TaintTrackingPublic as Public
+
+module Private {
+  import experimental.dataflow.DataFlow4::DataFlow4 as DataFlow
+  import experimental.dataflow.internal.TaintTrackingPrivate
+}
--- a/python/ql/src/experimental/semmle/python/Concepts.qll
+++ b/python/ql/src/experimental/semmle/python/Concepts.qll
@@ -40,6 +40,74 @@ module SystemCommandExecution {
  }
 }

+/**
+ * A data flow node that performs a file system access, including reading and writing data,
+ * creating and deleting files and folders, checking and updating permissions, and so on.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `FileSystemAccess::Range` instead.
+ */
+class FileSystemAccess extends DataFlow::Node {
+  FileSystemAccess::Range range;
+
+  FileSystemAccess() { this = range }
+
+  /** Gets an argument to this file system access that is interpreted as a path. */
+  DataFlow::Node getAPathArgument() { result = range.getAPathArgument() }
+}
+
+/** Provides a class for modeling new file system access APIs. */
+module FileSystemAccess {
+  /**
+   * A data-flow node that performs a file system access, including reading and writing data,
+   * creating and deleting files and folders, checking and updating permissions, and so on.
+   *
+   * Extend this class to model new APIs. If you want to refine existing API models,
+   * extend `FileSystemAccess` instead.
+   */
+  abstract class Range extends DataFlow::Node {
+    /** Gets an argument to this file system access that is interpreted as a path. */
+    abstract DataFlow::Node getAPathArgument();
+  }
+}
+
+/** Provides classes for modeling path-related APIs. */
+module Path {
+  /**
+   * A data-flow node that performs path normalization. This is often needed in order
+   * to safely access paths.
+   */
+  class PathNormalization extends DataFlow::Node {
+    PathNormalization::Range range;
+
+    PathNormalization() { this = range }
+  }
+
+  /** Provides a class for modeling new path normalization APIs. */
+  module PathNormalization {
+    /**
+     * A data-flow node that performs path normalization. This is often needed in order
+     * to safely access paths.
+     */
+    abstract class Range extends DataFlow::Node { }
+  }
+
+  /** A data-flow node that checks that a path is safe to access. */
+  class SafeAccessCheck extends DataFlow::BarrierGuard {
+    SafeAccessCheck::Range range;
+
+    SafeAccessCheck() { this = range }
+
+    override predicate checks(ControlFlowNode node, boolean branch) { range.checks(node, branch) }
+  }
+
+  /** Provides a class for modeling new path safety checks. */
+  module SafeAccessCheck {
+    /** A data-flow node that checks that a path is safe to access. */
+    abstract class Range extends DataFlow::BarrierGuard { }
+  }
+}
+
 /**
 * A data-flow node that decodes data from a binary or textual format. This
 * is intended to include deserialization, unmarshalling, decoding, unpickling,
--- a/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll
+++ b/python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll
@@ -82,19 +82,76 @@ private module Stdlib {

    /** Provides models for the `os.path` module */
    module path {
-      /** Gets a reference to the `os.path.join` function. */
-      private DataFlow::Node join(DataFlow::TypeTracker t) {
-        t.start() and
-        result = DataFlow::importNode("os.path.join")
+      /**
+       * Gets a reference to the attribute `attr_name` of the `os.path` module.
+       * WARNING: Only holds for a few predefined attributes.
+       *
+       * For example, using `attr_name = "join"` will get all uses of `os.path.join`.
+       */
+      private DataFlow::Node path_attr(DataFlow::TypeTracker t, string attr_name) {
+        attr_name in ["join", "normpath"] and
+        (
+          t.start() and
+          result = DataFlow::importNode("os.path." + attr_name)
+          or
+          t.startInAttr(attr_name) and
+          result = os::path()
+        )
        or
-        t.startInAttr("join") and
-        result = os::path()
-        or
-        exists(DataFlow::TypeTracker t2 | result = join(t2).track(t2, t))
+        // Due to bad performance when using normal setup with `path_attr(t2, attr_name).track(t2, t)`
+        // we have inlined that code and forced a join
+        exists(DataFlow::TypeTracker t2 |
+          exists(DataFlow::StepSummary summary |
+            path_attr_first_join(t2, attr_name, result, summary) and
+            t = t2.append(summary)
+          )
+        )
+      }
+
+      pragma[nomagic]
+      private predicate path_attr_first_join(
+        DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res,
+        DataFlow::StepSummary summary
+      ) {
+        DataFlow::StepSummary::step(path_attr(t2, attr_name), res, summary)
+      }
+
+      /**
+       * Gets a reference to the attribute `attr_name` of the `os.path` module.
+       * WARNING: Only holds for a few predefined attributes.
+       *
+       * For example, using `attr_name = "join"` will get all uses of `os.path.join`.
+       */
+      DataFlow::Node path_attr(string attr_name) {
+        result = path_attr(DataFlow::TypeTracker::end(), attr_name)
      }

      /** Gets a reference to the `os.path.join` function. */
-      DataFlow::Node join() { result = join(DataFlow::TypeTracker::end()) }
+      DataFlow::Node join() { result = path_attr("join") }
+    }
+  }
+
+  /**
+   * A call to `os.path.normpath`.
+   * See https://docs.python.org/3/library/os.path.html#os.path.normpath
+   */
+  private class OsPathNormpathCall extends Path::PathNormalization::Range, DataFlow::CfgNode {
+    override CallNode node;
+
+    OsPathNormpathCall() { node.getFunction() = os::path::path_attr("normpath").asCfgNode() }
+
+    DataFlow::Node getPathArg() {
+      result.asCfgNode() in [node.getArg(0), node.getArgByName("path")]
+    }
+  }
+
+  /** An additional taint step for calls to `os.path.normpath` */
+  private class OsPathNormpathCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
+    override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+      exists(OsPathNormpathCall call |
+        nodeTo = call and
+        nodeFrom = call.getPathArg()
+      )
    }
  }

@@ -681,3 +738,30 @@ private class ExecStatement extends CodeExecution::Range {

  override DataFlow::Node getCode() { result = this }
 }
+
+/**
+ * A call to the builtin `open` function.
+ * See https://docs.python.org/3/library/functions.html#open
+ */
+private class OpenCall extends FileSystemAccess::Range, DataFlow::CfgNode {
+  override CallNode node;
+
+  OpenCall() { node.getFunction().(NameNode).getId() = "open" }
+
+  override DataFlow::Node getAPathArgument() {
+    result.asCfgNode() in [node.getArg(0), node.getArgByName("file")]
+  }
+}
+
+/**
+ * A call to the `startswith` method on a string.
+ * See https://docs.python.org/3.9/library/stdtypes.html#str.startswith
+ */
+private class StartswithCall extends Path::SafeAccessCheck::Range {
+  StartswithCall() { this.(CallNode).getFunction().(AttrNode).getName() = "startswith" }
+
+  override predicate checks(ControlFlowNode node, boolean branch) {
+    node = this.(CallNode).getFunction().(AttrNode).getObject() and
+    branch = true
+  }
+}