Python: Use custom PathGraph

2025-12-20 10:46:30 +01:00 · 2020-10-23 01:10:21 +02:00
parent 8ce5f41366
commit f88cc3c98e
2 changed files with 133 additions and 40 deletions
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-022/ChainedConfigs12.qll
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-022/ChainedConfigs12.qll
@@ -0,0 +1,79 @@
+/**
+ * This defines a `PathGraph` where sinks from `TaintTracking::Configuration`s are identified with
+ * sources from `TaintTracking2::Configuration`s if they represent the same `ControlFlowNode`.
+ *
+ * Paths are then connected appropriately.
+ */
+
+import python
+import experimental.dataflow.DataFlow
+import experimental.dataflow.DataFlow2
+import experimental.dataflow.TaintTracking
+import experimental.dataflow.TaintTracking2
+
+/**
+ * A `ControlFlowNode` that appears as a sink in Config1 and a source in Config2.
+ */
+private predicate crossoverNode(ControlFlowNode n) {
+  exists(DataFlow::Node n1, DataFlow2::Node n2 |
+    any(TaintTracking::Configuration t1).isSink(n1) and
+    any(TaintTracking2::Configuration t2).isSource(n2) and
+    n = n1.asCfgNode() and
+    n = n2.asCfgNode()
+  )
+}
+
+/**
+ * A new type which represents the union of the two sets of nodes.
+ */
+private newtype TCustomPathNode =
+  Config1Node(DataFlow::PathNode node1) { not crossoverNode(node1.getNode().asCfgNode()) } or
+  Config2Node(DataFlow2::PathNode node1) { not crossoverNode(node1.getNode().asCfgNode()) } or
+  CrossoverNode(ControlFlowNode e) { crossoverNode(e) }
+
+/**
+ * A class representing the set of all the path nodes in either config.
+ */
+class CustomPathNode extends TCustomPathNode {
+  /** Gets the PathNode if it is in Config1. */
+  DataFlow::PathNode asNode1() {
+    this = Config1Node(result) or this = CrossoverNode(result.getNode().asCfgNode())
+  }
+
+  /** Gets the PathNode if it is in Config2. */
+  DataFlow2::PathNode asNode2() {
+    this = Config2Node(result) or this = CrossoverNode(result.getNode().asCfgNode())
+  }
+
+  predicate hasLocationInfo(
+    string filepath, int startline, int startcolumn, int endline, int endcolumn
+  ) {
+    asNode1().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+    or
+    asNode2().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+  }
+
+  string toString() {
+    result = asNode1().toString()
+    or
+    result = asNode2().toString()
+  }
+}
+
+/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
+query predicate edges(CustomPathNode a, CustomPathNode b) {
+  // Edge is in Config1 graph
+  DataFlow::PathGraph::edges(a.asNode1(), b.asNode1())
+  or
+  // Edge is in Config2 graph
+  DataFlow2::PathGraph::edges(a.asNode2(), b.asNode2())
+}
+
+/** Holds if `n` is a node in the graph of data flow path explanations. */
+query predicate nodes(CustomPathNode n, string key, string val) {
+  // Node is in Config1 graph
+  DataFlow::PathGraph::nodes(n.asNode1(), key, val)
+  or
+  // Node is in Config2 graph
+  DataFlow2::PathGraph::nodes(n.asNode2(), key, val)
+}
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-022/PathInjection.ql
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-022/PathInjection.ql
@@ -1,4 +1,22 @@
 /**
+ * The query detects the case where a path is not both normalized and _afterwards_ checked.
+ *
+ * It does so by dividing the problematic situation into two cases:
+ *  1. The path is never normalized.
+ *     This is easily detected by using normalization as a sanitizer.
+ *
+ *  2. The path is normalized at least once, but never checked afterwards.
+ *     This is detected by finding the first normalization and then ensure that
+ *     no checks happen after. Since we start from the first normalization,
+ *     we know that the absence of checks means that no normalization has a
+ *     chek after it. (No checks after a second normalization would be ok if
+ *     there was a check between the first and the second.)
+ *
+ * Note that one could make the dual split on whether the path is ever checked. This does
+ * not work as nicely, however, since checking is modelled as a `BarrierGuard` rather than
+ * as a `Sanitizer`. That means that only some paths out of a check will be removed, and so
+ * identifying the last check is not possible simply by finding a path from it to a sink.
+ *
 * @name Uncontrolled data used in path expression
 * @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
 * @kind path-problem
@@ -23,11 +41,14 @@ import experimental.dataflow.TaintTracking
 import experimental.dataflow.TaintTracking2
 import experimental.semmle.python.Concepts
 import experimental.dataflow.RemoteFlowSources
-import DataFlow::PathGraph
+import ChainedConfigs12

+// ---------------------------------------------------------------------------
+// Case 1. The path is never normalized.
+// ---------------------------------------------------------------------------
 /** Configuration to find paths from sources to sinks that contain no normalization. */
-class UnNormalizedPathConfiguration extends TaintTracking::Configuration {
-  UnNormalizedPathConfiguration() { this = "UnNormalizedPathConfiguration" }
+class PathNotNormalizedConfiguration extends TaintTracking::Configuration {
+  PathNotNormalizedConfiguration() { this = "PathNotNormalizedConfiguration" }

  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }

@@ -38,8 +59,15 @@ class UnNormalizedPathConfiguration extends TaintTracking::Configuration {
  override predicate isSanitizer(DataFlow::Node node) { node instanceof PathNormalization }
 }

+predicate pathNotNormalized(CustomPathNode source, CustomPathNode sink) {
+  any(PathNotNormalizedConfiguration config).hasFlowPath(source.asNode1(), sink.asNode1())
+}
+
+// ---------------------------------------------------------------------------
+// Case 2. The path is normalized at least once, but never checked afterwards.
+// ---------------------------------------------------------------------------
 /** Configuration to find paths from sources to normalizations that contain no prior normalizations. */
-class FirstNormalizationConfiguration extends TaintTracking2::Configuration {
+class FirstNormalizationConfiguration extends TaintTracking::Configuration {
  FirstNormalizationConfiguration() { this = "FirstNormalizationConfiguration" }

  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
@@ -49,26 +77,11 @@ class FirstNormalizationConfiguration extends TaintTracking2::Configuration {
  override predicate isSanitizerOut(DataFlow::Node node) { node instanceof PathNormalization }
 }

-class FirstNormalization extends DataFlow2::PathNode {
-  DataFlow::Node sourceNode;
-
-  FirstNormalization() {
-    exists(FirstNormalizationConfiguration conf, DataFlow2::PathNode source |
-      sourceNode = source.getNode() and
-      conf.hasFlowPath(source, this)
-    )
-  }
-
-  DataFlow::Node getSourceNode() { result = sourceNode }
-}
-
 /** Configuration to find paths from normalizations to sinks that do not go through a check. */
-class UncheckedNormalizedConfiguration extends TaintTracking::Configuration {
-  UncheckedNormalizedConfiguration() { this = "UncheckedNormalizedConfiguration" }
+class NormalizedPathNotCheckedConfiguration extends TaintTracking2::Configuration {
+  NormalizedPathNotCheckedConfiguration() { this = "NormalizedPathNotCheckedConfiguration" }

-  override predicate isSource(DataFlow::Node source) {
-    source = any(FirstNormalization n).getNode()
-  }
+  override predicate isSource(DataFlow::Node source) { source instanceof PathNormalization }

  override predicate isSink(DataFlow::Node sink) {
    sink = any(FileSystemAccess e).getAPathArgument()
@@ -77,22 +90,23 @@ class UncheckedNormalizedConfiguration extends TaintTracking::Configuration {
  override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { guard instanceof PathCheck }
 }

-from TaintTracking::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
-where
-  // Path has no normalization on it.
-  config instanceof UnNormalizedPathConfiguration and
-  config.hasFlowPath(source, sink)
-  or
-  // Path has a normalization on it, but no subsequent check.
-  config instanceof UncheckedNormalizedConfiguration and
-  config.hasFlowPath(source, sink)
-  or
-  // This should report a better source, but does not quite work.
-  // Path has a normalization on it, but no subsequent check.
-  config instanceof UncheckedNormalizedConfiguration and
-  exists(DataFlow::PathNode c, FirstNormalization n | n.getNode() = c.getNode() |
-    config.hasFlowPath(c, sink) and
-    source.getNode() = n.getSourceNode()
+predicate pathNotCheckedAfterNormalization(CustomPathNode source, CustomPathNode sink) {
+  exists(
+    FirstNormalizationConfiguration config, DataFlow::PathNode mid1, DataFlow2::PathNode mid2,
+    NormalizedPathNotCheckedConfiguration config2
+  |
+    config.hasFlowPath(source.asNode1(), mid1) and
+    config2.hasFlowPath(mid2, sink.asNode2()) and
+    mid1.getNode().asCfgNode() = mid2.getNode().asCfgNode()
  )
-select sink.getNode(), source, sink, "This path depends on $@.", source.getNode(),
-  "a user-provided value"
+}
+
+// ---------------------------------------------------------------------------
+// Query: Either case 1 or case 2.
+// ---------------------------------------------------------------------------
+from CustomPathNode source, CustomPathNode sink
+where
+  pathNotNormalized(source, sink)
+  or
+  pathNotCheckedAfterNormalization(source, sink)
+select sink, source, sink, "This path depends on $@.", source, "a user-provided value"