mirror of
https://github.com/github/codeql.git
synced 2026-05-03 04:39:29 +02:00
Python: Port query and add test
This commit is contained in:
@@ -0,0 +1,10 @@
|
||||
|
||||
from django.conf.urls import url
|
||||
import json
|
||||
|
||||
def safe(pickled):
|
||||
return json.loads(pickled)
|
||||
|
||||
urlpatterns = [
|
||||
url(r'^(?P<object>.*)$', safe)
|
||||
]
|
||||
@@ -0,0 +1,10 @@
|
||||
|
||||
from django.conf.urls import url
|
||||
import pickle
|
||||
|
||||
def unsafe(pickled):
|
||||
return pickle.loads(pickled)
|
||||
|
||||
urlpatterns = [
|
||||
url(r'^(?P<object>.*)$', unsafe)
|
||||
]
|
||||
@@ -0,0 +1,61 @@
|
||||
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
Deserializing untrusted data using any deserialization framework that
|
||||
allows the construction of arbitrary serializable objects is easily exploitable
|
||||
and in many cases allows an attacker to execute arbitrary code. Even before a
|
||||
deserialized object is returned to the caller of a deserialization method a lot
|
||||
of code may have been executed, including static initializers, constructors,
|
||||
and finalizers. Automatic deserialization of fields means that an attacker may
|
||||
craft a nested combination of objects on which the executed initialization code
|
||||
may have unforeseen effects, such as the execution of arbitrary code.
|
||||
</p>
|
||||
<p>
|
||||
There are many different serialization frameworks. This query currently
|
||||
supports Pickle, Marshal and Yaml.
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>
|
||||
Avoid deserialization of untrusted data if at all possible. If the
|
||||
architecture permits it then use other formats instead of serialized objects,
|
||||
for example JSON.
|
||||
</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>
|
||||
The following example calls <code>pickle.loads</code> directly on a
|
||||
value provided by an incoming HTTP request. Pickle then creates a new value from untrusted data, and is
|
||||
therefore inherently unsafe.
|
||||
</p>
|
||||
<sample src="UnpicklingBad.py" />
|
||||
|
||||
<p>
|
||||
Changing the code to use <code>json.loads</code> instead of <code>pickle.loads</code> removes the vulnerability.
|
||||
</p>
|
||||
<sample src="JsonGood.py" />
|
||||
|
||||
</example>
|
||||
|
||||
<references>
|
||||
|
||||
<li>
|
||||
OWASP vulnerability description:
|
||||
<a href="https://www.owasp.org/index.php/Deserialization_of_untrusted_data">Deserialization of untrusted data</a>.
|
||||
</li>
|
||||
<li>
|
||||
OWASP guidance on deserializing objects:
|
||||
<a href="https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html">Deserialization Cheat Sheet</a>.
|
||||
</li>
|
||||
<li>
|
||||
Talks by Chris Frohoff & Gabriel Lawrence:
|
||||
<a href="http://frohoff.github.io/appseccali-marshalling-pickles/">
|
||||
AppSecCali 2015: Marshalling Pickles - how deserializing objects will ruin your day</a>
|
||||
</li>
|
||||
</references>
|
||||
|
||||
</qhelp>
|
||||
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* @name Deserializing untrusted input
|
||||
* @description Deserializing user-controlled data may allow attackers to execute arbitrary code.
|
||||
* @kind path-problem
|
||||
* @id py/unsafe-deserialization
|
||||
* @problem.severity error
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @tags external/cwe/cwe-502
|
||||
* security
|
||||
* serialization
|
||||
*/
|
||||
|
||||
import python
|
||||
import experimental.dataflow.DataFlow
|
||||
import experimental.dataflow.TaintTracking
|
||||
import experimental.semmle.python.Concepts
|
||||
import experimental.dataflow.RemoteFlowSources
|
||||
import DataFlow::PathGraph
|
||||
|
||||
class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
|
||||
UnsafeDeserializationConfiguration() { this = "Unsafe deserialization configuration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sink = any(DeserializationSink d).getData() }
|
||||
}
|
||||
|
||||
from UnsafeDeserializationConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"
|
||||
@@ -38,3 +38,33 @@ module SystemCommandExecution {
|
||||
abstract DataFlow::Node getCommand();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that performs deserialization in a potentially unsafe way.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `DeserializationSink::Range` instead.
|
||||
*/
|
||||
class DeserializationSink extends DataFlow::Node {
|
||||
DeserializationSink::Range self;
|
||||
|
||||
DeserializationSink() { this = self }
|
||||
|
||||
/** Gets the argument that specifies the command to be executed. */
|
||||
DataFlow::Node getData() { result = self.getData() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new system-command execution APIs. */
|
||||
module DeserializationSink {
|
||||
/**
|
||||
* A data-flow node that executes an operating system command,
|
||||
* for instance by spawning a new process.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `DeserializationSink` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the argument that specifies the command to be executed. */
|
||||
abstract DataFlow::Node getData();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,3 +32,20 @@ class SystemCommandExecutionTest extends InlineExpectationsTest {
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class DeserializationSinkTest extends InlineExpectationsTest {
|
||||
DeserializationSinkTest() { this = "DeserializationSinkTest" }
|
||||
|
||||
override string getARelevantTag() { result = "getData" }
|
||||
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(DeserializationSink ds, DataFlow::Node data |
|
||||
exists(location.getFile().getRelativePath()) and
|
||||
data = ds.getData() and
|
||||
location = data.getLocation() and
|
||||
element = data.toString() and
|
||||
value = value_from_expr(data.asExpr()) and
|
||||
tag = "getData"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
| unsafe_deserialization.py:12:28:12:45 | Comment # $getData=payload | Missing result:getData=payload |
|
||||
| unsafe_deserialization.py:13:25:13:42 | Comment # $getData=payload | Missing result:getData=payload |
|
||||
| unsafe_deserialization.py:14:29:14:46 | Comment # $getData=payload | Missing result:getData=payload |
|
||||
| unsafe_deserialization.py:16:26:16:43 | Comment # $getData=payload | Missing result:getData=payload |
|
||||
@@ -0,0 +1,2 @@
|
||||
import python
|
||||
import experimental.meta.ConceptsTest
|
||||
@@ -0,0 +1,3 @@
|
||||
edges
|
||||
nodes
|
||||
#select
|
||||
@@ -0,0 +1 @@
|
||||
experimental/Security-new-dataflow/CWE-502/UnsafeDeserialization.ql
|
||||
@@ -0,0 +1 @@
|
||||
semmle-extractor-options: --max-import-depth=2 -p ../lib
|
||||
@@ -0,0 +1,16 @@
|
||||
import flask
|
||||
import pickle
|
||||
import yaml
|
||||
import marshal
|
||||
|
||||
from flask import Flask, request
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/")
|
||||
def hello():
|
||||
payload = request.args.get('payload')
|
||||
pickle.loads(payload) # $getData=payload
|
||||
yaml.load(payload) # $getData=payload
|
||||
marshal.loads(payload) # $getData=payload
|
||||
import dill
|
||||
dill.loads(payload) # $getData=payload
|
||||
Reference in New Issue
Block a user