Python: Port query and add test

2026-05-03 04:39:29 +02:00 · 2020-10-09 16:11:37 +02:00
parent b0d01cfe8d
commit 0d8bd01e10
12 changed files with 186 additions and 0 deletions
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-502/JsonGood.py
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-502/JsonGood.py
@@ -0,0 +1,10 @@
+
+from django.conf.urls import url
+import json
+
+def safe(pickled):
+    return json.loads(pickled)
+
+urlpatterns = [
+    url(r'^(?P<object>.*)$', safe)
+]
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-502/UnpicklingBad.py
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-502/UnpicklingBad.py
@@ -0,0 +1,10 @@
+
+from django.conf.urls import url
+import pickle
+
+def unsafe(pickled):
+    return pickle.loads(pickled)
+
+urlpatterns = [
+    url(r'^(?P<object>.*)$', unsafe)
+]
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-502/UnsafeDeserialization.qhelp
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-502/UnsafeDeserialization.qhelp
@@ -0,0 +1,61 @@
+<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
+<qhelp>
+
+<overview>
+<p>
+Deserializing untrusted data using any deserialization framework that
+allows the construction of arbitrary serializable objects is easily exploitable
+and in many cases allows an attacker to execute arbitrary code.  Even before a
+deserialized object is returned to the caller of a deserialization method a lot
+of code may have been executed, including static initializers, constructors,
+and finalizers.  Automatic deserialization of fields means that an attacker may
+craft a nested combination of objects on which the executed initialization code
+may have unforeseen effects, such as the execution of arbitrary code.
+</p>
+<p>
+There are many different serialization frameworks.  This query currently
+supports Pickle, Marshal and Yaml.
+</p>
+</overview>
+
+<recommendation>
+<p>
+Avoid deserialization of untrusted data if at all possible.  If the
+architecture permits it then use other formats instead of serialized objects,
+for example JSON.
+</p>
+</recommendation>
+
+<example>
+<p>
+The following example calls <code>pickle.loads</code> directly on a
+value provided by an incoming HTTP request. Pickle then creates a new value from untrusted data, and is
+therefore inherently unsafe.
+</p>
+<sample src="UnpicklingBad.py" />
+
+<p>
+Changing the code to use <code>json.loads</code> instead of <code>pickle.loads</code> removes the vulnerability.
+</p>
+<sample src="JsonGood.py" />
+
+</example>
+
+<references>
+
+<li>
+OWASP vulnerability description:
+<a href="https://www.owasp.org/index.php/Deserialization_of_untrusted_data">Deserialization of untrusted data</a>.
+</li>
+<li>
+OWASP guidance on deserializing objects:
+<a href="https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html">Deserialization Cheat Sheet</a>.
+</li>
+<li>
+Talks by Chris Frohoff &amp; Gabriel Lawrence:
+<a href="http://frohoff.github.io/appseccali-marshalling-pickles/">
+AppSecCali 2015: Marshalling Pickles - how deserializing objects will ruin your day</a>
+</li>
+</references>
+
+</qhelp>
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-502/UnsafeDeserialization.ql
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-502/UnsafeDeserialization.ql
@@ -0,0 +1,31 @@
+/**
+ * @name Deserializing untrusted input
+ * @description Deserializing user-controlled data may allow attackers to execute arbitrary code.
+ * @kind path-problem
+ * @id py/unsafe-deserialization
+ * @problem.severity error
+ * @sub-severity high
+ * @precision high
+ * @tags external/cwe/cwe-502
+ *       security
+ *       serialization
+ */
+
+import python
+import experimental.dataflow.DataFlow
+import experimental.dataflow.TaintTracking
+import experimental.semmle.python.Concepts
+import experimental.dataflow.RemoteFlowSources
+import DataFlow::PathGraph
+
+class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
+  UnsafeDeserializationConfiguration() { this = "Unsafe deserialization configuration" }
+
+  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
+
+  override predicate isSink(DataFlow::Node sink) { sink = any(DeserializationSink d).getData() }
+}
+
+from UnsafeDeserializationConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"
--- a/python/ql/src/experimental/semmle/python/Concepts.qll
+++ b/python/ql/src/experimental/semmle/python/Concepts.qll
@@ -38,3 +38,33 @@ module SystemCommandExecution {
    abstract DataFlow::Node getCommand();
  }
 }
+
+/**
+ * A data-flow node that performs deserialization in a potentially unsafe way.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `DeserializationSink::Range` instead.
+ */
+class DeserializationSink extends DataFlow::Node {
+  DeserializationSink::Range self;
+
+  DeserializationSink() { this = self }
+
+  /** Gets the argument that specifies the command to be executed. */
+  DataFlow::Node getData() { result = self.getData() }
+}
+
+/** Provides a class for modeling new system-command execution APIs. */
+module DeserializationSink {
+  /**
+   * A data-flow node that executes an operating system command,
+   * for instance by spawning a new process.
+   *
+   * Extend this class to model new APIs. If you want to refine existing API models,
+   * extend `DeserializationSink` instead.
+   */
+  abstract class Range extends DataFlow::Node {
+    /** Gets the argument that specifies the command to be executed. */
+    abstract DataFlow::Node getData();
+  }
+}
--- a/python/ql/test/experimental/meta/ConceptsTest.qll
+++ b/python/ql/test/experimental/meta/ConceptsTest.qll
@@ -32,3 +32,20 @@ class SystemCommandExecutionTest extends InlineExpectationsTest {
    )
  }
 }
+
+class DeserializationSinkTest extends InlineExpectationsTest {
+  DeserializationSinkTest() { this = "DeserializationSinkTest" }
+
+  override string getARelevantTag() { result = "getData" }
+
+  override predicate hasActualResult(Location location, string element, string tag, string value) {
+    exists(DeserializationSink ds, DataFlow::Node data |
+      exists(location.getFile().getRelativePath()) and
+      data = ds.getData() and
+      location = data.getLocation() and
+      element = data.toString() and
+      value = value_from_expr(data.asExpr()) and
+      tag = "getData"
+    )
+  }
+}
--- a/python/ql/test/experimental/query-tests/Security-new-dataflow/CWE-502/ConceptsTest.expected
+++ b/python/ql/test/experimental/query-tests/Security-new-dataflow/CWE-502/ConceptsTest.expected
@@ -0,0 +1,4 @@
+| unsafe_deserialization.py:12:28:12:45 | Comment # $getData=payload | Missing result:getData=payload |
+| unsafe_deserialization.py:13:25:13:42 | Comment # $getData=payload | Missing result:getData=payload |
+| unsafe_deserialization.py:14:29:14:46 | Comment # $getData=payload | Missing result:getData=payload |
+| unsafe_deserialization.py:16:26:16:43 | Comment # $getData=payload | Missing result:getData=payload |
--- a/python/ql/test/experimental/query-tests/Security-new-dataflow/CWE-502/ConceptsTest.ql
+++ b/python/ql/test/experimental/query-tests/Security-new-dataflow/CWE-502/ConceptsTest.ql
@@ -0,0 +1,2 @@
+import python
+import experimental.meta.ConceptsTest
--- a/python/ql/test/experimental/query-tests/Security-new-dataflow/CWE-502/UnsafeDeserialization.expected
+++ b/python/ql/test/experimental/query-tests/Security-new-dataflow/CWE-502/UnsafeDeserialization.expected
@@ -0,0 +1,3 @@
+edges
+nodes
+#select
--- a/python/ql/test/experimental/query-tests/Security-new-dataflow/CWE-502/UnsafeDeserialization.qlref
+++ b/python/ql/test/experimental/query-tests/Security-new-dataflow/CWE-502/UnsafeDeserialization.qlref
@@ -0,0 +1 @@
+experimental/Security-new-dataflow/CWE-502/UnsafeDeserialization.ql
--- a/python/ql/test/experimental/query-tests/Security-new-dataflow/CWE-502/options
+++ b/python/ql/test/experimental/query-tests/Security-new-dataflow/CWE-502/options
@@ -0,0 +1 @@
+semmle-extractor-options: --max-import-depth=2 -p ../lib
--- a/python/ql/test/experimental/query-tests/Security-new-dataflow/CWE-502/unsafe_deserialization.py
+++ b/python/ql/test/experimental/query-tests/Security-new-dataflow/CWE-502/unsafe_deserialization.py
@@ -0,0 +1,16 @@
+import flask
+import pickle
+import yaml
+import marshal
+
+from flask import Flask, request
+app = Flask(__name__)
+
+@app.route("/")
+def hello():
+    payload = request.args.get('payload')
+    pickle.loads(payload)  # $getData=payload
+    yaml.load(payload)  # $getData=payload
+    marshal.loads(payload)  # $getData=payload
+    import dill
+    dill.loads(payload)  # $getData=payload
				`@@ -0,0 +1 @@`
				`experimental/Security-new-dataflow/CWE-502/UnsafeDeserialization.ql`
				`@@ -0,0 +1 @@`
				`semmle-extractor-options: --max-import-depth=2 -p ../lib`