Merge pull request #16760 from owen-mc/java/reverse-dns-separate-threat-model-kind

Java: make a separate threat model kind for reverse DNS sources
2025-12-17 01:03:14 +01:00 · 2024-07-23 10:08:52 +01:00
parent d2573310c6 2a5144d9d9
commit ff8bb2b1f8
10 changed files with 104 additions and 29 deletions
--- a/docs/codeql/reusables/threat-model-description.rst
+++ b/docs/codeql/reusables/threat-model-description.rst
@@ -5,6 +5,15 @@ A threat model is a named class of dataflow sources that can be enabled or disab
 The ``kind`` property of the ``sourceModel`` determines which threat model a source is associated with. There are two main categories:

 - ``remote`` which represents requests and responses from the network.
- ``local`` which represents data from local files (``file``), command-line arguments (``commandargs``), database reads (``database``), and environment variables(``environment``).
+- ``local`` which represents data from local files (``file``), command-line arguments (``commandargs``), database reads (``database``), environment variables(``environment``) and Windows registry values ("windows-registry"). Currently, Windows registry values are used by C# only.
+
+Note that subcategories can be turned included or excluded separately, so you can specify ``local`` without ``database``, or just ``commandargs`` and ``environment`` without the rest of ``local``.
+
+The less commonly used categories are:
+
+- ``android`` which represents reads from external files in Android (``android-external-storage-dir``) and parameter of an entry-point method declared in a ``ContentProvider`` class (``contentprovider``). Currently only used by Java/Kotlin.
+- ``database-access-result`` which represents a database access. Currently only used by JavaScript.
+- ``file-write`` which represents opening a file in write mode. Currently only used in C#.
+- ``reverse-dns`` which represents reverse DNS lookups. Currently only used in Java.

 When running a CodeQL analysis, the ``remote`` threat model is included by default. You can optionally include other threat models as appropriate when using the CodeQL CLI and in GitHub code scanning. For more information, see `Analyzing your code with CodeQL queries <https://docs.github.com/code-security/codeql-cli/getting-started-with-the-codeql-cli/analyzing-your-code-with-codeql-queries#including-model-packs-to-add-potential-sources-of-tainted-data>`__ and `Customizing your advanced setup for code scanning <https://docs.github.com/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning#extending-codeql-coverage-with-threat-models>`__.
--- a/java/ql/lib/change-notes/2024-06-14-reverse-dns-separate-threat-model-kind.md
+++ b/java/ql/lib/change-notes/2024-06-14-reverse-dns-separate-threat-model-kind.md
@@ -0,0 +1,4 @@
+---
+category: majorAnalysis
+---
+* We previously considered reverse DNS resolutions (IP address -> domain name) as sources of untrusted data, since compromised/malicious DNS servers could potentially return malicious responses to arbitrary requests. We have now removed this source from the default set of untrusted sources and made a new threat model kind for them, called "reverse-dns". You can optionally include other threat models as appropriate when using the CodeQL CLI and in GitHub code scanning. For more information, see [Analyzing your code with CodeQL queries](https://docs.github.com/code-security/codeql-cli/getting-started-with-the-codeql-cli/analyzing-your-code-with-codeql-queries#including-model-packs-to-add-potential-sources-of-tainted-data>) and [Customizing your advanced setup for code scanning](https://docs.github.com/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning#extending-codeql-coverage-with-threat-models).
--- a/java/ql/lib/semmle/code/java/dataflow/FlowSources.qll
+++ b/java/ql/lib/semmle/code/java/dataflow/FlowSources.qll
@@ -119,21 +119,6 @@ private predicate variableStep(Expr tracked, VarAccess sink) {
  )
 }

-private class ReverseDnsSource extends RemoteFlowSource {
-  ReverseDnsSource() {
-    // Try not to trigger on `localhost`.
-    exists(MethodCall m | m = this.asExpr() |
-      m.getMethod() instanceof ReverseDnsMethod and
-      not exists(MethodCall l |
-        (variableStep(l, m.getQualifier()) or l = m.getQualifier()) and
-        (l.getMethod().getName() = "getLocalHost" or l.getMethod().getName() = "getLoopbackAddress")
-      )
-    )
-  }
-
-  override string getSourceType() { result = "reverse DNS lookup" }
-}
-
 private class MessageBodyReaderParameterSource extends RemoteFlowSource {
  MessageBodyReaderParameterSource() {
    exists(MessageBodyReaderRead m |
@@ -388,6 +373,24 @@ class AndroidJavascriptInterfaceMethodParameter extends RemoteFlowSource {
  }
 }

+/** A node with input that comes from a reverse DNS lookup. */
+abstract class ReverseDnsUserInput extends UserInput {
+  override string getThreatModel() { result = "reverse-dns" }
+}
+
+private class ReverseDnsSource extends ReverseDnsUserInput {
+  ReverseDnsSource() {
+    // Try not to trigger on `localhost`.
+    exists(MethodCall m | m = this.asExpr() |
+      m.getMethod() instanceof ReverseDnsMethod and
+      not exists(MethodCall l |
+        (variableStep(l, m.getQualifier()) or l = m.getQualifier()) and
+        (l.getMethod().getName() = "getLocalHost" or l.getMethod().getName() = "getLoopbackAddress")
+      )
+    )
+  }
+}
+
 /**
 * A data flow source node for an API, which should be considered
 * supported for a modeling perspective.
--- a/java/ql/test/library-tests/dataflow/taintsources/A.java
+++ b/java/ql/test/library-tests/dataflow/taintsources/A.java
@@ -43,7 +43,12 @@ public class A {
    };
    sink(new URL("test").openConnection().getInputStream()); // $hasRemoteValueFlow
    sink(new Socket("test", 1234).getInputStream()); // $hasRemoteValueFlow
-    sink(InetAddress.getByName("test").getHostName()); // $hasRemoteValueFlow
+    sink(InetAddress.getByName("test").getHostName()); // $hasReverseDnsValueFlow
+    sink(InetAddress.getLocalHost().getHostName());
+    sink(InetAddress.getLoopbackAddress().getHostName());
+    sink(InetAddress.getByName("test").getCanonicalHostName()); // $hasReverseDnsValueFlow
+    sink(InetAddress.getLocalHost().getCanonicalHostName());
+    sink(InetAddress.getLoopbackAddress().getCanonicalHostName());

    sink(System.in); // $hasLocalValueFlow
    sink(new FileInputStream("test")); // $hasLocalValueFlow
--- a/java/ql/test/library-tests/dataflow/taintsources/local.ql
+++ b/java/ql/test/library-tests/dataflow/taintsources/local.ql
@@ -2,16 +2,12 @@ import java
 import semmle.code.java.dataflow.FlowSources
 import TestUtilities.InlineExpectationsTest

-class LocalSource extends DataFlow::Node instanceof UserInput {
-  LocalSource() { not this instanceof RemoteFlowSource }
-}
-
 predicate isTestSink(DataFlow::Node n) {
  exists(MethodCall ma | ma.getMethod().hasName("sink") | n.asExpr() = ma.getAnArgument())
 }

 module LocalValueConfig implements DataFlow::ConfigSig {
-  predicate isSource(DataFlow::Node n) { n instanceof LocalSource }
+  predicate isSource(DataFlow::Node n) { n instanceof LocalUserInput }

  predicate isSink(DataFlow::Node n) { isTestSink(n) }
 }
@@ -19,7 +15,7 @@ module LocalValueConfig implements DataFlow::ConfigSig {
 module LocalValueFlow = DataFlow::Global<LocalValueConfig>;

 module LocalTaintConfig implements DataFlow::ConfigSig {
-  predicate isSource(DataFlow::Node n) { n instanceof LocalSource }
+  predicate isSource(DataFlow::Node n) { n instanceof LocalUserInput }

  predicate isSink(DataFlow::Node n) { isTestSink(n) }
 }
--- a/java/ql/test/library-tests/dataflow/taintsources/reversedns.expected
+++ b/java/ql/test/library-tests/dataflow/taintsources/reversedns.expected
@@ -0,0 +1,2 @@
+failures
+testFailures
--- a/java/ql/test/library-tests/dataflow/taintsources/reversedns.ql
+++ b/java/ql/test/library-tests/dataflow/taintsources/reversedns.ql
@@ -0,0 +1,47 @@
+import java
+import semmle.code.java.dataflow.FlowSources
+import TestUtilities.InlineExpectationsTest
+
+predicate isTestSink(DataFlow::Node n) {
+  exists(MethodCall ma | ma.getMethod().hasName("sink") | n.asExpr() = ma.getAnArgument())
+}
+
+module ReverseDnsValueConfig implements DataFlow::ConfigSig {
+  predicate isSource(DataFlow::Node n) { n instanceof ReverseDnsUserInput }
+
+  predicate isSink(DataFlow::Node n) { isTestSink(n) }
+}
+
+module ReverseDnsValueFlow = DataFlow::Global<ReverseDnsValueConfig>;
+
+module ReverseDnsTaintConfig implements DataFlow::ConfigSig {
+  predicate isSource(DataFlow::Node n) { n instanceof ReverseDnsUserInput }
+
+  predicate isSink(DataFlow::Node n) { isTestSink(n) }
+}
+
+module ReverseDnsTaintFlow = TaintTracking::Global<ReverseDnsTaintConfig>;
+
+module ReverseDnsFlowTest implements TestSig {
+  string getARelevantTag() { result = ["hasReverseDnsValueFlow", "hasReverseDnsTaintFlow"] }
+
+  predicate hasActualResult(Location location, string element, string tag, string value) {
+    tag = "hasReverseDnsValueFlow" and
+    exists(DataFlow::Node sink | ReverseDnsValueFlow::flowTo(sink) |
+      sink.getLocation() = location and
+      element = sink.toString() and
+      value = ""
+    )
+    or
+    tag = "hasReverseDnsTaintFlow" and
+    exists(DataFlow::Node src, DataFlow::Node sink |
+      ReverseDnsTaintFlow::flow(src, sink) and not ReverseDnsValueFlow::flow(src, sink)
+    |
+      sink.getLocation() = location and
+      element = sink.toString() and
+      value = ""
+    )
+  }
+}
+
+import MakeTest<ReverseDnsFlowTest>
--- a/java/ql/test/query-tests/security/CWE-022/semmle/tests/Test.java
+++ b/java/ql/test/query-tests/security/CWE-022/semmle/tests/Test.java
@@ -5,12 +5,14 @@ import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.net.InetAddress;
 import java.net.URL;
 import java.nio.charset.Charset;
 import java.nio.file.Files;
 import java.nio.file.Path;
+
+import javax.servlet.http.HttpServletRequest;
 import javax.xml.transform.stream.StreamResult;
+
 import org.apache.commons.io.FileUtils;
 import org.apache.tools.ant.AntClassLoader;
 import org.apache.tools.ant.DirectoryScanner;
@@ -24,10 +26,10 @@ import org.springframework.util.FileCopyUtils;

 public class Test {

-    private InetAddress address;
+    private HttpServletRequest request;

    public Object source() {
-        return address.getHostName();
+        return request.getParameter("source");
    }

    void test() throws IOException {
@@ -166,8 +168,8 @@ public class Test {
        new LargeText((File) source(), null, false, false); // $ hasTaintFlow
    }

-    void doGet6(String root, InetAddress address) throws IOException {
-        String temp = address.getHostName();
+    void doGet6(String root, HttpServletRequest request) throws IOException {
+        String temp = request.getParameter("source");
        // GOOD: Use `contains` and `startsWith` to check if the path is safe
        if (!temp.contains("..") && temp.startsWith(root + "/")) {
            File file = new File(temp);
--- a/shared/mad/codeql/mad/ModelValidation.qll
+++ b/shared/mad/codeql/mad/ModelValidation.qll
@@ -118,7 +118,7 @@ module KindValidation<KindValidationConfigSig Config> {
      this =
        [
          // shared
-          "local", "remote", "file", "commandargs", "database", "environment",
+          "local", "remote", "file", "commandargs", "database", "environment", "reverse-dns",
          // Java
          "android-external-storage-dir", "contentprovider",
          // C#
--- a/shared/threat-models/ext/threat-model-grouping.model.yml
+++ b/shared/threat-models/ext/threat-model-grouping.model.yml
@@ -21,3 +21,10 @@ extensions:
      # Android threat models
      - ["android-external-storage-dir", "android"]
      - ["contentprovider", "android"]
+
+      # Threat models that are not grouped with any other threat models.
+      # (Note that all threat models are a child of "all" implicitly, and we
+      # make it explicit here just to make sure all threat models are listed.)
+      - ["database-access-result", "all"]
+      - ["file-write", "all"]
+      - ["reverse-dns", "all"]