Python: Add modeling of simplejson PyPI package

I noticed that we don't handle PostUpdateNote very well in the concept tests,
for exmaple for `json.dump(...)` there _should_ have been an `encodeOutput` as
part of the inline expectations.

I'll work on fixing that up in a separate PR, to keep things clean.
This commit is contained in:
Rasmus Wriedt Larsen
2021-05-10 12:15:40 +02:00
parent 8afdf26540
commit 3fe9a3d933
9 changed files with 140 additions and 0 deletions

View File

@@ -156,6 +156,7 @@ Python built-in support
Tornado, Web framework
PyYAML, Serialization
dill, Serialization
simplejson, Serialization
fabric, Utility library
invoke, Utility library
idna, Utility library

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the PyPI package `simplejson`.

View File

@@ -16,6 +16,7 @@ private import semmle.python.frameworks.MysqlConnectorPython
private import semmle.python.frameworks.MySQLdb
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.Tornado
private import semmle.python.frameworks.Yaml

View File

@@ -0,0 +1,84 @@
/**
* Provides classes modeling security-relevant aspects of the `simplejson` PyPI package.
* See https://simplejson.readthedocs.io/en/latest/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `simplejson` PyPI package.
* See https://simplejson.readthedocs.io/en/latest/.
*/
private module SimplejsonModel {
/**
* A call to `simplejson.dumps`.
*
* See https://simplejson.readthedocs.io/en/latest/#simplejson.dumps
*/
private class SimplejsonDumpsCall extends Encoding::Range, DataFlow::CallCfgNode {
SimplejsonDumpsCall() { this = API::moduleImport("simplejson").getMember("dumps").getACall() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "JSON" }
}
/**
* A call to `simplejson.dump`.
*
* See https://simplejson.readthedocs.io/en/latest/#simplejson.dump
*/
private class SimplejsonDumpCall extends Encoding::Range, DataFlow::CallCfgNode {
SimplejsonDumpCall() { this = API::moduleImport("simplejson").getMember("dump").getACall() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] }
override DataFlow::Node getOutput() {
result.(DataFlow::PostUpdateNode).getPreUpdateNode() in [
this.getArg(1), this.getArgByName("fp")
]
}
override string getFormat() { result = "JSON" }
}
/**
* A call to `simplejson.loads`.
*
* See https://simplejson.readthedocs.io/en/latest/#simplejson.loads
*/
private class SimplejsonLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
SimplejsonLoadsCall() { this = API::moduleImport("simplejson").getMember("loads").getACall() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("s")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "JSON" }
override predicate mayExecuteInput() { none() }
}
/**
* A call to `simplejson.load`.
*
* See https://simplejson.readthedocs.io/en/latest/#simplejson.load
*/
private class SimplejsonLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
SimplejsonLoadCall() { this = API::moduleImport("simplejson").getMember("load").getACall() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("fp")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "JSON" }
override predicate mayExecuteInput() { none() }
}
}

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,3 @@
argumentToEnsureNotTaintedNotMarkedAsSpurious
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
failures

View File

@@ -0,0 +1 @@
import experimental.meta.InlineTaintTest

View File

@@ -0,0 +1,46 @@
import simplejson
from io import StringIO
def test():
ts = TAINTED_STRING
tainted_obj = {"foo": ts}
encoded = simplejson.dumps(tainted_obj) # $ encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj
ensure_tainted(
encoded, # $ tainted
simplejson.dumps(tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj
simplejson.dumps(obj=tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj
simplejson.loads(encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded
simplejson.loads(s=encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded
)
# load/dump with file-like
tainted_filelike = StringIO()
simplejson.dump(tainted_obj, tainted_filelike) # $ encodeFormat=JSON encodeInput=tainted_obj
tainted_filelike.seek(0)
ensure_tainted(
tainted_filelike, # $ tainted
simplejson.load(tainted_filelike), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=tainted_filelike
)
# load/dump with file-like using keyword-args
tainted_filelike = StringIO()
simplejson.dump(obj=tainted_obj, fp=tainted_filelike) # $ encodeFormat=JSON encodeInput=tainted_obj
tainted_filelike.seek(0)
ensure_tainted(
tainted_filelike, # $ tainted
simplejson.load(fp=tainted_filelike), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=tainted_filelike
)
# To make things runable
TAINTED_STRING = "TAINTED_STRING"
def ensure_tainted(*args):
print("- ensure_tainted")
for i, arg in enumerate(args):
print("arg {}: {!r}".format(i, arg))
test()