Python: Port query and add test

2025-12-21 03:06:31 +01:00 · 2020-10-09 16:11:37 +02:00
parent b0d01cfe8d
commit 0d8bd01e10
12 changed files with 186 additions and 0 deletions
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-502/JsonGood.py
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-502/JsonGood.py
@@ -0,0 +1,10 @@
+
+from django.conf.urls import url
+import json
+
+def safe(pickled):
+    return json.loads(pickled)
+
+urlpatterns = [
+    url(r'^(?P<object>.*)$', safe)
+]
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-502/UnpicklingBad.py
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-502/UnpicklingBad.py
@@ -0,0 +1,10 @@
+
+from django.conf.urls import url
+import pickle
+
+def unsafe(pickled):
+    return pickle.loads(pickled)
+
+urlpatterns = [
+    url(r'^(?P<object>.*)$', unsafe)
+]
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-502/UnsafeDeserialization.qhelp
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-502/UnsafeDeserialization.qhelp
@@ -0,0 +1,61 @@
+<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
+<qhelp>
+
+<overview>
+<p>
+Deserializing untrusted data using any deserialization framework that
+allows the construction of arbitrary serializable objects is easily exploitable
+and in many cases allows an attacker to execute arbitrary code.  Even before a
+deserialized object is returned to the caller of a deserialization method a lot
+of code may have been executed, including static initializers, constructors,
+and finalizers.  Automatic deserialization of fields means that an attacker may
+craft a nested combination of objects on which the executed initialization code
+may have unforeseen effects, such as the execution of arbitrary code.
+</p>
+<p>
+There are many different serialization frameworks.  This query currently
+supports Pickle, Marshal and Yaml.
+</p>
+</overview>
+
+<recommendation>
+<p>
+Avoid deserialization of untrusted data if at all possible.  If the
+architecture permits it then use other formats instead of serialized objects,
+for example JSON.
+</p>
+</recommendation>
+
+<example>
+<p>
+The following example calls <code>pickle.loads</code> directly on a
+value provided by an incoming HTTP request. Pickle then creates a new value from untrusted data, and is
+therefore inherently unsafe.
+</p>
+<sample src="UnpicklingBad.py" />
+
+<p>
+Changing the code to use <code>json.loads</code> instead of <code>pickle.loads</code> removes the vulnerability.
+</p>
+<sample src="JsonGood.py" />
+
+</example>
+
+<references>
+
+<li>
+OWASP vulnerability description:
+<a href="https://www.owasp.org/index.php/Deserialization_of_untrusted_data">Deserialization of untrusted data</a>.
+</li>
+<li>
+OWASP guidance on deserializing objects:
+<a href="https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html">Deserialization Cheat Sheet</a>.
+</li>
+<li>
+Talks by Chris Frohoff &amp; Gabriel Lawrence:
+<a href="http://frohoff.github.io/appseccali-marshalling-pickles/">
+AppSecCali 2015: Marshalling Pickles - how deserializing objects will ruin your day</a>
+</li>
+</references>
+
+</qhelp>
--- a/python/ql/src/experimental/Security-new-dataflow/CWE-502/UnsafeDeserialization.ql
+++ b/python/ql/src/experimental/Security-new-dataflow/CWE-502/UnsafeDeserialization.ql
@@ -0,0 +1,31 @@
+/**
+ * @name Deserializing untrusted input
+ * @description Deserializing user-controlled data may allow attackers to execute arbitrary code.
+ * @kind path-problem
+ * @id py/unsafe-deserialization
+ * @problem.severity error
+ * @sub-severity high
+ * @precision high
+ * @tags external/cwe/cwe-502
+ *       security
+ *       serialization
+ */
+
+import python
+import experimental.dataflow.DataFlow
+import experimental.dataflow.TaintTracking
+import experimental.semmle.python.Concepts
+import experimental.dataflow.RemoteFlowSources
+import DataFlow::PathGraph
+
+class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
+  UnsafeDeserializationConfiguration() { this = "Unsafe deserialization configuration" }
+
+  override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
+
+  override predicate isSink(DataFlow::Node sink) { sink = any(DeserializationSink d).getData() }
+}
+
+from UnsafeDeserializationConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
+where config.hasFlowPath(source, sink)
+select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"