From ae69a2bcd9f7e1da4ad26d375fb2f4618759d567 Mon Sep 17 00:00:00 2001
From: tiferet <tiferet@github.com>
Date: Fri, 3 Feb 2023 07:25:08 -0800
Subject: [PATCH] Separate out the sink types to align with the MaD `kind`s
 that currently exist, adding a sink type for all sinks of a given query that
 are not currently mapped in the MaD `kind`s.

---
 .../EndpointCharacteristics.qll               | 119 +++++++++++++++---
 .../adaptivethreatmodeling/EndpointTypes.qll  |  83 ++++++++----
 .../RequestForgeryATM.qll                     |   6 +-
 .../SqlInjectionATM.qll                       |   4 +-
 .../adaptivethreatmodeling/TaintedPathATM.qll |   4 +-
 5 files changed, 175 insertions(+), 41 deletions(-)

diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll
index fd5499b700f..25017e48177 100644
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointCharacteristics.qll
@@ -43,7 +43,9 @@ predicate erroneousEndpoints(
     not (
       characteristic instanceof IsSanitizerCharacteristic or
       characteristic2 instanceof IsSanitizerCharacteristic
-    )
+    ) and
+    // We currently know of some sink types that overlap with one another.
+    not knownOverlappingCharacteristics(characteristic, characteristic2)
   ) and
   errorMessage = "Endpoint has high-confidence positive indicators for multiple classes"
   or
@@ -68,6 +70,20 @@ predicate erroneousConfidences(
   errorMessage = "Characteristic has an indicator with confidence outside of [0, 1]"
 }
 
+/**
+ * Holds if `characteristic1` and `characteristic2` are among the pairs of currently known positive characteristics that
+ * have some overlap in their known sinks (always for the same query type). This is not necessarily a problem, because
+ * both characteristics belong to the same query.
+ */
+private predicate knownOverlappingCharacteristics(
+  EndpointCharacteristics::EndpointCharacteristic characteristic1,
+  EndpointCharacteristics::EndpointCharacteristic characteristic2
+) {
+  characteristic1 != characteristic2 and
+  characteristic1 = ["file creation sink", "other path injection sink"] and
+  characteristic2 = ["file creation sink", "other path injection sink"]
+}
+
 predicate isTypeAccess(DataFlow::Node n) { n.asExpr() instanceof TypeAccess }
 
 /**
@@ -262,23 +278,55 @@ abstract class EndpointCharacteristic extends string {
 //     confidence = maximalConfidence()
 //   }
 // }
+/**
+ * Endpoints identified as "create-file" sinks by the MaD modeling are file creation sinks with maximal confidence.
+ */
+private class CreateFileSinkCharacteristic extends EndpointCharacteristic {
+  CreateFileSinkCharacteristic() { this = any(CreateFileSinkType type).getDescription() }
+
+  override predicate appliesToEndpoint(DataFlow::Node n) { sinkNode(n, "create-file") }
+
+  override predicate hasImplications(
+    EndpointType endpointClass, boolean isPositiveIndicator, float confidence
+  ) {
+    endpointClass instanceof CreateFileSinkType and
+    isPositiveIndicator = true and
+    confidence = maximalConfidence()
+  }
+}
+
 /**
  * Endpoints identified as "TaintedPathSink" by the standard Java libraries are path injection sinks with maximal
  * confidence.
  */
 private class TaintedPathSinkCharacteristic extends EndpointCharacteristic {
-  TaintedPathSinkCharacteristic() { this = any(TaintedPathSinkType type).getDescription() }
+  TaintedPathSinkCharacteristic() { this = any(TaintedPathOtherSinkType type).getDescription() }
 
   override predicate appliesToEndpoint(DataFlow::Node n) {
     n.asExpr() = any(PathCreation p).getAnInput()
-    or
-    sinkNode(n, "create-file")
   }
 
   override predicate hasImplications(
     EndpointType endpointClass, boolean isPositiveIndicator, float confidence
   ) {
-    endpointClass instanceof TaintedPathSinkType and
+    endpointClass instanceof TaintedPathOtherSinkType and
+    isPositiveIndicator = true and
+    confidence = maximalConfidence()
+  }
+}
+
+/**
+ * Endpoints identified as "sql" sinks by the MaD modeling are SQL sinks with maximal confidence.
+ */
+private class SqlSinkCharacteristic extends EndpointCharacteristic {
+  SqlSinkCharacteristic() { this = any(SqlSinkType type).getDescription() }
+
+  override predicate appliesToEndpoint(DataFlow::Node n) { sinkNode(n, "sql") }
+
+  override predicate hasImplications(
+    EndpointType endpointClass, boolean isPositiveIndicator, float confidence
+  ) {
+    endpointClass instanceof SqlSinkType and
     isPositiveIndicator = true and
     confidence = maximalConfidence()
   }
@@ -289,32 +337,75 @@ private class TaintedPathSinkCharacteristic extends EndpointCharacteristic {
  * confidence.
  */
 private class SqlInjectionSinkCharacteristic extends EndpointCharacteristic {
-  SqlInjectionSinkCharacteristic() { this = any(SqlInjectionSinkType type).getDescription() }
+  SqlInjectionSinkCharacteristic() { this = any(SqlInjectionOtherSinkType type).getDescription() }
 
-  override predicate appliesToEndpoint(DataFlow::Node n) { n instanceof QueryInjectionSink }
+  override predicate appliesToEndpoint(DataFlow::Node n) {
+    n instanceof QueryInjectionSink and
+    not sinkNode(n, "sql")
+  }
 
   override predicate hasImplications(
     EndpointType endpointClass, boolean isPositiveIndicator, float confidence
   ) {
-    endpointClass instanceof SqlInjectionSinkType and
+    endpointClass instanceof SqlInjectionOtherSinkType and
     isPositiveIndicator = true and
     confidence = maximalConfidence()
   }
 }
 
 /**
- * Endpoints identified as "RequestForgerySink" by the standard Java libraries are server-side request forgery sinks
- * with maximal confidence.
+ * Endpoints identified as "open-url" sinks by the MaD modeling are URL opening sinks with maximal confidence.
  */
-private class RequestForgerySinkCharacteristic extends EndpointCharacteristic {
-  RequestForgerySinkCharacteristic() { this = any(RequestForgerySinkType type).getDescription() }
+private class UrlOpenSinkCharacteristic extends EndpointCharacteristic {
+  UrlOpenSinkCharacteristic() { this = any(UrlOpenSinkType type).getDescription() }
 
-  override predicate appliesToEndpoint(DataFlow::Node n) { n instanceof RequestForgerySink }
+  override predicate appliesToEndpoint(DataFlow::Node n) { sinkNode(n, "open-url") }
 
   override predicate hasImplications(
     EndpointType endpointClass, boolean isPositiveIndicator, float confidence
   ) {
-    endpointClass instanceof RequestForgerySinkType and
+    endpointClass instanceof UrlOpenSinkType and
+    isPositiveIndicator = true and
+    confidence = maximalConfidence()
+  }
+}
+
+/**
+ * Endpoints identified as "jdbc-url" sinks by the MaD modeling are JDBC URL opening sinks with maximal confidence.
+ */
+private class JdbcUrlSinkCharacteristic extends EndpointCharacteristic {
+  JdbcUrlSinkCharacteristic() { this = any(JdbcUrlSinkType type).getDescription() }
+
+  override predicate appliesToEndpoint(DataFlow::Node n) { sinkNode(n, "jdbc-url") }
+
+  override predicate hasImplications(
+    EndpointType endpointClass, boolean isPositiveIndicator, float confidence
+  ) {
+    endpointClass instanceof JdbcUrlSinkType and
+    isPositiveIndicator = true and
+    confidence = maximalConfidence()
+  }
+}
+
+/**
+ * Endpoints identified as "RequestForgerySink" by the standard Java libraries but not by MaD models are server-side
+ * request forgery sinks with maximal confidence.
+ */
+private class RequestForgeryOtherSinkCharacteristic extends EndpointCharacteristic {
+  RequestForgeryOtherSinkCharacteristic() {
+    this = any(RequestForgeryOtherSinkType type).getDescription()
+  }
+
+  override predicate appliesToEndpoint(DataFlow::Node n) {
+    n instanceof RequestForgerySink and
+    not sinkNode(n, "open-url") and
+    not sinkNode(n, "jdbc-url")
+  }
+
+  override predicate hasImplications(
+    EndpointType endpointClass, boolean isPositiveIndicator, float confidence
+  ) {
+    endpointClass instanceof RequestForgeryOtherSinkType and
     isPositiveIndicator = true and
     confidence = maximalConfidence()
   }
diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll
index 15995473329..264cf5e0a9a 100644
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointTypes.qll
@@ -9,9 +9,13 @@ newtype TEndpointType =
   TNegativeType() or
   TXssSinkType() or
   TNosqlInjectionSinkType() or
-  TSqlInjectionSinkType() or
-  TTaintedPathSinkType() or
-  TRequestForgerySinkType()
+  TSqlInjectionOtherSinkType() or
+  TTaintedPathOtherSinkType() or
+  TRequestForgeryOtherSinkType() or
+  TUrlOpenSinkType() or
+  TJdbcUrlSinkType() or
+  TCreateFileSinkType() or
+  TSqlSinkType()
 
 /** A class that can be predicted by endpoint scoring models. */
 abstract class EndpointType extends TEndpointType {
@@ -34,7 +38,7 @@ abstract class EndpointType extends TEndpointType {
   string toString() { result = getDescription() }
 }
 
-/** The `Negative` class that can be predicted by endpoint scoring models. */
+/** The `Negative` class for non-sinks. */
 class NegativeType extends EndpointType, TNegativeType {
   override string getDescription() { result = "non-sink" }
 
@@ -43,38 +47,69 @@ class NegativeType extends EndpointType, TNegativeType {
   override string getKind() { result = "" }
 }
 
-/** The `XssSink` class that can be predicted by endpoint scoring models. */
-class XssSinkType extends EndpointType, TXssSinkType {
-  override string getDescription() { result = "xss sink" }
-
-  override int getEncoding() { result = 1 }
-
-  override string getKind() { result = "xss" }
-}
-
-/** The `SqlInjectionSink` class that can be predicted by endpoint scoring models. */
-class SqlInjectionSinkType extends EndpointType, TSqlInjectionSinkType {
+/** Sinks of MaD kind `sql` */
+class SqlSinkType extends EndpointType, TSqlSinkType {
   override string getDescription() { result = "sql injection sink" }
 
-  override int getEncoding() { result = 3 }
+  override int getEncoding() { result = 1 }
 
   override string getKind() { result = "sql" }
 }
 
-/** The `TaintedPathSink` class that can be predicted by endpoint scoring models. */
-class TaintedPathSinkType extends EndpointType, TTaintedPathSinkType {
-  override string getDescription() { result = "path injection sink" }
+/** Other SQL injection sinks that are not yet included in the MaD sink kinds. */
+class SqlInjectionOtherSinkType extends EndpointType, TSqlInjectionOtherSinkType {
+  override string getDescription() {
+    result = "java persistence or mongodb or other query injection sink"
+  }
 
-  override int getEncoding() { result = 4 }
+  override int getEncoding() { result = 2 }
+
+  override string getKind() { result = "sql-other" }
+}
+
+/** Sinks of MaD kind `create-file` */
+class CreateFileSinkType extends EndpointType, TCreateFileSinkType {
+  override string getDescription() { result = "file creation sink" }
+
+  override int getEncoding() { result = 3 }
 
   override string getKind() { result = "create-file" }
 }
 
-/** The `RequestForgerySinkType` class that can be predicted by endpoint scoring models. */
-class RequestForgerySinkType extends EndpointType, TRequestForgerySinkType {
-  override string getDescription() { result = "server-side request forgery sink" }
+/** Other tainted path injection sinks that are not yet included in the MaD sink kinds. */
+class TaintedPathOtherSinkType extends EndpointType, TTaintedPathOtherSinkType {
+  override string getDescription() { result = "other path injection sink" }
+
+  override int getEncoding() { result = 4 }
+
+  override string getKind() { result = "tainted-path-other" }
+}
+
+/** Sinks of MaD kind `open-url`. */
+class UrlOpenSinkType extends EndpointType, TUrlOpenSinkType {
+  override string getDescription() { result = "url opening sink" }
 
   override int getEncoding() { result = 5 }
 
-  override string getKind() { result = "open-url" } // TODO: is this correct, or should it be “jdbc-url”?
+  override string getKind() { result = "open-url" }
+}
+
+/** Sinks of MaD kind `jdbc-url`. */
+class JdbcUrlSinkType extends EndpointType, TJdbcUrlSinkType {
+  override string getDescription() { result = "jdbc url sink" } // TODO: What's a good description of this sink type?
+
+  override int getEncoding() { result = 6 }
+
+  override string getKind() { result = "jdbc-url" }
+}
+
+/**
+ * Other SSRF sinks that are not yet included in the MaD sink kinds.
+ */
+class RequestForgeryOtherSinkType extends EndpointType, TRequestForgeryOtherSinkType {
+  override string getDescription() { result = "other server-side request forgery sink" }
+
+  override int getEncoding() { result = 7 }
+
+  override string getKind() { result = "ssrf-other" }
 }
diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/RequestForgeryATM.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/RequestForgeryATM.qll
index 2ce83096433..fce3612491e 100644
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/RequestForgeryATM.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/RequestForgeryATM.qll
@@ -23,7 +23,11 @@ class RequestForgeryAtmConfig extends AtmConfig {
     not source.asExpr().(MethodAccess).getCallee() instanceof UrlConnectionGetInputStreamMethod
   }
 
-  override EndpointType getASinkEndpointType() { result instanceof RequestForgerySinkType }
+  override EndpointType getASinkEndpointType() {
+    result instanceof RequestForgeryOtherSinkType or
+    result instanceof UrlOpenSinkType or
+    result instanceof JdbcUrlSinkType
+  }
 
   /*
    * This is largely a copy of the taint tracking configuration for the standard SSRF
diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlInjectionATM.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlInjectionATM.qll
index 1759ac1fd82..0cc43f3370a 100644
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlInjectionATM.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/SqlInjectionATM.qll
@@ -15,7 +15,9 @@ class SqlInjectionAtmConfig extends AtmConfig {
 
   override predicate isKnownSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
 
-  override EndpointType getASinkEndpointType() { result instanceof SqlInjectionSinkType }
+  override EndpointType getASinkEndpointType() {
+    result instanceof SqlInjectionOtherSinkType or result instanceof SqlSinkType
+  }
 
   /*
    * This is largely a copy of the taint tracking configuration for the standard SQL injection
diff --git a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll
index d216c0c9567..479bbecdbe9 100644
--- a/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll
+++ b/java/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/TaintedPathATM.qll
@@ -16,7 +16,9 @@ class TaintedPathAtmConfig extends AtmConfig {
 
   override predicate isKnownSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
 
-  override EndpointType getASinkEndpointType() { result instanceof TaintedPathSinkType }
+  override EndpointType getASinkEndpointType() {
+    result instanceof TaintedPathOtherSinkType or result instanceof CreateFileSinkType
+  }
 
   /*
    * This is largely a copy of the taint tracking configuration for the standard path injection