Java: Untrusted data used in external APIs

This commit adds two queries for identifying external APIs which are used with untrusted data. These queries are intended to facilitate a security review of the application, and will report any external API which is called with untrusted data. The purpose of this is to: - review how untrusted data flows through this application - identify opportunities to improve taint modeling of sinks and taint steps. As a result this is not suitable for integration into a developer workflow, as it will likely have high false positive rate, but it may help identify false negatives for other queries.
2025-12-22 03:36:30 +01:00 · 2020-07-03 17:32:08 +01:00
parent 04a0d47ab9
commit b242a61701
7 changed files with 311 additions and 0 deletions
--- a/java/ql/src/Security/CWE/CWE-020/ExternalAPISinkExample.java
+++ b/java/ql/src/Security/CWE/CWE-020/ExternalAPISinkExample.java
@@ -0,0 +1,8 @@
+public class XSS extends HttpServlet {
+	protected void doGet(HttpServletRequest request, HttpServletResponse response)
+	throws ServletException, IOException {
+		// BAD: a request parameter is written directly to an error response page
+		response.sendError(HttpServletResponse.SC_NOT_FOUND,
+				"The page \"" + request.getParameter("page") + "\" was not found.");
+	}
+}
--- a/java/ql/src/Security/CWE/CWE-020/ExternalAPITaintStepExample.java
+++ b/java/ql/src/Security/CWE/CWE-020/ExternalAPITaintStepExample.java
@@ -0,0 +1,12 @@
+public class SQLInjection extends HttpServlet {
+	protected void doGet(HttpServletRequest request, HttpServletResponse response)
+	throws ServletException, IOException {
+
+		StringBuilder sqlQueryBuilder = new StringBuilder();
+		sqlQueryBuilder.append("SELECT * FROM user WHERE user_id='");
+		sqlQueryBuilder.append(request.getParameter("user_id"));
+		sqlQueryBuilder.append("'");
+
+		// ...
+	}
+}
--- a/java/ql/src/Security/CWE/CWE-020/ExternalAPIsUsedWithUntrustedData.qhelp
+++ b/java/ql/src/Security/CWE/CWE-020/ExternalAPIsUsedWithUntrustedData.qhelp
@@ -0,0 +1,48 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+<overview>
+<p>Using unsanitized untrusted data in an external API can cause a variety of security issues. This query reports 
+all external APIs which are used with untrusted data, along with how frequently the API is used, and how many 
+unique sources of untrusted data flow this API. This query is designed primarily to help identify which APIs 
+may be relevant for security analysis of this application</p>
+
+<p>An external API is defined as a method call to a method which is not defined in the source code, not overridden 
+in the source code, and is not modeled as a taint step in the default taint library. External APIs may be from the 
+Java standard library, third party dependencies or from internal dependencies. The query will report the method 
+signature with a fully qualified name, along with either <code>[param x]</code>, where <code>x</code> indicates the 
+position of the parameter receiving the untrusted data or <code>[qualifier]</code> indicating the untrusted data is 
+used as the qualifier to the method call.</p>
+
+</overview>
+<recommendation>
+
+<p>For each result:</p>
+
+<ul>
+  <li>If the result highlights a known sink, no action is required.</li>
+  <li>If the result highlights an unknown sink for a problem, then add modeling for the sink to the relevant query.</li>
+  <li>If the result represents a call to an external API which transfers taint, add the appropriate modeling, and 
+  re-run the query to determine what new results have appeared due to this additional modeling.</li>
+</ul>
+
+<p>Otherwise, the result is likely uninteresting. Custom versions of this query can extend the <code>SafeExternalAPIMethod</code> 
+class to exclude known safe external APIs from future analysis.
+
+</recommendation>
+<example>
+
+<p>If the query were to return the API <code>javax.servlet.http.HttpServletResponse.sendError(int sc, java.lang.String msg) [param 1]</code> 
+then we should first consider whether this a security relevant sink. In this case, this is writing to a HTTP response, so we should 
+consider whether this is an XSS sink. If it is, we should confirm that it is handled by the XSS query.</p>
+
+<p>If the query were to return the API <code>java.lang.StringBuilder.append(java.lang.String str) [param 0]</code>, then this should be
+reviewed as a possible taint step, because tainted data would flow from the 0th argument to the qualifier of the call.</p>
+
+<p>Note that both examples are correctly handled with the standard taint tracking library and XSS query.</p>
+</example>
+<references>
+
+</references>
+</qhelp>
--- a/java/ql/src/Security/CWE/CWE-020/ExternalAPIsUsedWithUntrustedData.ql
+++ b/java/ql/src/Security/CWE/CWE-020/ExternalAPIsUsedWithUntrustedData.ql
@@ -0,0 +1,17 @@
+/**
+ * @name Frequency counts for external APIs which are used with untrusted data
+ * @description This reports the external APIs which are used with untrusted data, along with how
+ *              frequently the API is called, and how many unique sources of untrusted data flow
+ *              to it.
+ * @id java/count-untrusted-data-external-api
+ * @kind table
+ */
+
+import java
+import semmle.code.java.security.ExternalAPIs::ExternalAPIs
+import semmle.code.java.dataflow.DataFlow
+
+from ExternalAPIUsedWithUntrustedData externalAPI
+select externalAPI, count(externalAPI.getUntrustedDataNode()) as numberOfUses,
+  externalAPI.getNumberOfUntrustedSources() as numberOfUntrustedSources order by
+    numberOfUntrustedSources desc
--- a/java/ql/src/Security/CWE/CWE-020/UntrustedDataToExternalAPI.qhelp
+++ b/java/ql/src/Security/CWE/CWE-020/UntrustedDataToExternalAPI.qhelp
@@ -0,0 +1,59 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+<overview>
+<p>Using unsanitized untrusted data in an external API can cause a variety of security issues. This query reports 
+all uses of external APIs with untrusted data for review. This query has a deliberately low true positive rate, 
+and is designed to help security reviews for the application, as well as helping identify external APIs that 
+should be modeled as either taint steps, or sinks for specific problems.</p>
+
+<p>An external API is defined as a method call to a method which is not defined in the source code, not overridden 
+in the source code, and is not modeled as a taint step in the default taint library. External APIs may be from the
+Java standard library, third party dependencies or from internal dependencies. The query will report uses of 
+untrusted data in either the qualifier or as one of the arguments of external APIs.</p>
+
+</overview>
+<recommendation>
+
+<p>For each result:</p>
+
+<ul>
+  <li>If the result highlights a known sink, confirm that the result is reported by the relevant query, or 
+  that the result is a false positive due to sanitization.</li>
+  <li>If the result highlights an unknown sink for a problem, then add modeling for the sink to the relevant query, 
+  and confirm that the result is either found, or is safe due to appropriate sanitization.</li>
+  <li>If the result represents a call to an external API which transfers taint, add the appropriate modeling, and 
+  re-run the query to determine what new results have appeared due to this additional modeling.</li>
+</ul>
+
+<p>Otherwise, the result is likely uninteresting. Custom versions of this query can extend the <code>SafeExternalAPIMethod</code> 
+class to exclude known safe external APIs from future analysis.
+
+</recommendation>
+<example>
+
+<p>In this first example, a request parameter is read from <code>HttpServletRequest</code> and then ultimately used in a call to the 
+<code>HttpServletResponse.sendError</code> external API:
+
+<sample src="ExternalAPISinkExample.java" />
+
+<p>This is an XSS sink. The XSS query should therefore be reviewed to confirm that this sink is appropriately modeled, 
+and if it is, to confirm that the query reports this particular result, or that the result is false positive due to 
+some existing sanitization.</p>
+
+<p>In this second example, again a request parameter is read from <code>HttpServletRequest</code>.</p>
+
+<sample src="ExternalAPITaintStepExample.java" />
+
+<p>If the query reported the call to <code>StringBuilder.append</code> on Line 7, this would suggest that this external API is 
+not currently modeled as a taint step in the taint tracking library. The next step would be to model this as taint step, then
+re-run the query to determine what additional results might be found. In this example, it seems likely that the result of the
+<code>StringBuilder</code> will be executed as an SQL query, potentially leading to an SQL injection vulnerability.</p>
+
+<p>Note that both examples are correctly handled with the standard taint tracking library and XSS query.</p>
+</example>
+<references>
+
+</references>
+</qhelp>
--- a/java/ql/src/Security/CWE/CWE-020/UntrustedDataToExternalAPI.ql
+++ b/java/ql/src/Security/CWE/CWE-020/UntrustedDataToExternalAPI.ql
@@ -0,0 +1,21 @@
+/**
+ * @name Untrusted data passed to external API
+ * @description Data provided remotely is used in this external API.
+ * @id java/untrusted-data-to-external-api
+ * @kind path-problem
+ * @precision very-low
+ * @problem.severity error
+ * @tags security external/cwe/cwe-20
+ */
+
+import java
+import semmle.code.java.dataflow.FlowSources
+import semmle.code.java.dataflow.TaintTracking
+import semmle.code.java.security.ExternalAPIs::ExternalAPIs
+import DataFlow::PathGraph
+
+from UntrustedDataToExternalAPIConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink, source, sink,
+  "Call to " + sink.getNode().(ExternalAPIDataNode).getMethodDescription() +
+    " with untrusted data from $@.", source, source.toString()
--- a/java/ql/src/semmle/code/java/security/ExternalAPIs.qll
+++ b/java/ql/src/semmle/code/java/security/ExternalAPIs.qll
@@ -0,0 +1,146 @@
+/**
+ * Definitions for reasoning about untrusted data used in APIs defined outside the
+ * database.
+ */
+
+import java
+import semmle.code.java.dataflow.FlowSources
+import semmle.code.java.dataflow.TaintTracking
+
+module ExternalAPIs {
+  /**
+   * A `Method` which is considered a "safe" external API from a security perspective.
+   */
+  abstract class SafeExternalAPIMethod extends Method { }
+
+  /** The default set of "safe" external APIs. */
+  class DefaultSafeExternalAPIMethod extends SafeExternalAPIMethod {
+    DefaultSafeExternalAPIMethod() {
+      this instanceof EqualsMethod
+      or
+      getName().regexpMatch("size|length|compareTo|getClass|lastIndexOf")
+      or
+      this.getDeclaringType().hasQualifiedName("org.apache.commons.lang3", "Validate")
+      or
+      getQualifiedName() = "Objects.equals"
+      or
+      getDeclaringType().getQualifiedName() = "java.lang.String" and getName() = "equals"
+      or
+      getDeclaringType().hasQualifiedName("com.google.common.base", "Preconditions")
+      or
+      getDeclaringType().getPackage().getName().matches("org.junit%")
+      or
+      getDeclaringType().hasQualifiedName("com.google.common.base", "Strings") and
+      getName() = "isNullOrEmpty"
+      or
+      getDeclaringType().hasQualifiedName("org.apache.commons.lang3", "StringUtils") and
+      getName() = "isNotEmpty"
+      or
+      getDeclaringType().hasQualifiedName("java.lang", "Character") and
+      getName() = "isDigit"
+      or
+      getDeclaringType().hasQualifiedName("java.lang", "String") and
+      getName().regexpMatch("equalsIgnoreCase|regionMatches")
+      or
+      getDeclaringType().hasQualifiedName("java.lang", "Boolean") and
+      getName() = "parseBoolean"
+      or
+      getDeclaringType().hasQualifiedName("org.apache.commons.io", "IOUtils") and
+      getName() = "closeQuietly"
+      or
+      getDeclaringType().hasQualifiedName("org.springframework.util", "StringUtils") and
+      getName().regexpMatch("hasText|isEmpty")
+    }
+  }
+
+  /** A node representing data being passed to an external API. */
+  class ExternalAPIDataNode extends DataFlow::Node {
+    Call call;
+    int i;
+
+    ExternalAPIDataNode() {
+      (
+        // Argument to call to a method
+        this.asExpr() = call.getArgument(i)
+        or
+        // Qualifier to call to a method which returns non trivial value
+        this.asExpr() = call.getQualifier() and
+        i = -1 and
+        not call.getCallee().getReturnType() instanceof VoidType and
+        not call.getCallee().getReturnType() instanceof BooleanType
+      ) and
+      // Defined outside the source archive
+      not call.getCallee().fromSource() and
+      // Not a call to an method which is overridden in source
+      not exists(Method m |
+        m.getASourceOverriddenMethod() = call.getCallee().getSourceDeclaration() and
+        m.fromSource()
+      ) and
+      // Not already modelled as a taint step
+      not exists(DataFlow::Node next | TaintTracking::localTaintStep(this, next)) and
+      // Not a call to a known safe external API
+      not call.getCallee() instanceof SafeExternalAPIMethod
+    }
+
+    /** Gets the called API `Method`. */
+    Method getMethod() { result = call.getCallee() }
+
+    /** Gets the index which is passed untrusted data (where -1 indicates the qualifier). */
+    int getIndex() { result = i }
+
+    /** Gets the description of the method being called. */
+    string getMethodDescription() {
+        result =
+          getMethod().getDeclaringType().getPackage() + "." + getMethod().getQualifiedName()
+    }
+  }
+
+  class UntrustedDataToExternalAPIConfig extends TaintTracking::Configuration {
+    UntrustedDataToExternalAPIConfig() { this = "UntrustedDataToExternalAPIConfig" }
+
+    override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
+
+    override predicate isSink(DataFlow::Node sink) { sink instanceof ExternalAPIDataNode }
+  }
+
+  /** A node representing untrusted data being passed to an external API. */
+  class UntrustedExternalAPIDataNode extends ExternalAPIDataNode {
+    UntrustedExternalAPIDataNode() { any(UntrustedDataToExternalAPIConfig c).hasFlow(_, this) }
+
+    /** Gets a source of untrusted data which is passed to this external API data node. */
+    DataFlow::Node getAnUntrustedSource() {
+      any(UntrustedDataToExternalAPIConfig c).hasFlow(result, this)
+    }
+  }
+
+  private newtype TExternalAPI =
+    TExternalAPIParameter(Method m, int index) {
+      exists(UntrustedExternalAPIDataNode n |
+        m = n.getMethod() and
+        index = n.getIndex()
+      )
+    }
+
+  /** An external API which is used with untrusted data. */
+  class ExternalAPIUsedWithUntrustedData extends TExternalAPI {
+    /** Gets a possibly untrusted use of this external API. */
+    UntrustedExternalAPIDataNode getUntrustedDataNode() {
+      this = TExternalAPIParameter(result.getMethod(), result.getIndex())
+    }
+
+    /** Gets the number of untrusted sources used with this external API. */
+    int getNumberOfUntrustedSources() {
+      result = count(getUntrustedDataNode().getAnUntrustedSource())
+    }
+
+    string toString() {
+      exists(Method m, int index, string indexString |
+        if index = -1 then indexString = "qualifier" else indexString = "param " + index
+      |
+        this = TExternalAPIParameter(m, index) and
+        result =
+          m.getDeclaringType().(RefType).getQualifiedName() + "." + m.getSignature() + " [" + indexString + "]"
+      )
+    }
+  }
+}