Merge pull request #5864 from RasmusWL/some-framework-modeling

Approved by tausbn
This commit is contained in:
CodeQL CI
2021-05-19 02:31:06 -07:00
committed by GitHub
25 changed files with 390 additions and 32 deletions

View File

@@ -157,8 +157,11 @@ Python built-in support
Tornado, Web framework
PyYAML, Serialization
dill, Serialization
simplejson, Serialization
ujson, Serialization
fabric, Utility library
invoke, Utility library
idna, Utility library
mysql-connector-python, Database
MySQLdb, Database
psycopg2, Database

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the PyPI package `idna`, for encoding/decoding Internationalised Domain Names in Applications.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the PyPI package `simplejson`.

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the PyPI package `ujson`.

View File

@@ -2,17 +2,22 @@
* Helper file that imports all framework modeling.
*/
// If you add modeling of a new framework/library, remember to add it it to the docs in
// `docs/codeql/support/reusables/frameworks.rst`
private import semmle.python.frameworks.Cryptodome
private import semmle.python.frameworks.Cryptography
private import semmle.python.frameworks.Dill
private import semmle.python.frameworks.Django
private import semmle.python.frameworks.Fabric
private import semmle.python.frameworks.Flask
private import semmle.python.frameworks.Idna
private import semmle.python.frameworks.Invoke
private import semmle.python.frameworks.MysqlConnectorPython
private import semmle.python.frameworks.MySQLdb
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.Tornado
private import semmle.python.frameworks.Ujson
private import semmle.python.frameworks.Yaml

View File

@@ -0,0 +1,40 @@
/**
* Provides classes modeling security-relevant aspects of the `idna` PyPI package.
* See https://pypi.org/project/idna/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `idna` PyPI package.
* See https://pypi.org/project/idna/.
*/
private module IdnaModel {
/** A call to `idna.encode`. */
private class IdnaEncodeCall extends Encoding::Range, DataFlow::CallCfgNode {
IdnaEncodeCall() { this = API::moduleImport("idna").getMember("encode").getACall() }
override DataFlow::Node getAnInput() { result = [this.getArg(0), this.getArgByName("s")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "IDNA" }
}
/** A call to `idna.decode`. */
private class IdnaDecodeCall extends Decoding::Range, DataFlow::CallCfgNode {
IdnaDecodeCall() { this = API::moduleImport("idna").getMember("decode").getACall() }
override DataFlow::Node getAnInput() { result = [this.getArg(0), this.getArgByName("s")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "IDNA" }
override predicate mayExecuteInput() { none() }
}
}

View File

@@ -0,0 +1,84 @@
/**
* Provides classes modeling security-relevant aspects of the `simplejson` PyPI package.
* See https://simplejson.readthedocs.io/en/latest/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `simplejson` PyPI package.
* See https://simplejson.readthedocs.io/en/latest/.
*/
private module SimplejsonModel {
/**
* A call to `simplejson.dumps`.
*
* See https://simplejson.readthedocs.io/en/latest/#simplejson.dumps
*/
private class SimplejsonDumpsCall extends Encoding::Range, DataFlow::CallCfgNode {
SimplejsonDumpsCall() { this = API::moduleImport("simplejson").getMember("dumps").getACall() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "JSON" }
}
/**
* A call to `simplejson.dump`.
*
* See https://simplejson.readthedocs.io/en/latest/#simplejson.dump
*/
private class SimplejsonDumpCall extends Encoding::Range, DataFlow::CallCfgNode {
SimplejsonDumpCall() { this = API::moduleImport("simplejson").getMember("dump").getACall() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] }
override DataFlow::Node getOutput() {
result.(DataFlow::PostUpdateNode).getPreUpdateNode() in [
this.getArg(1), this.getArgByName("fp")
]
}
override string getFormat() { result = "JSON" }
}
/**
* A call to `simplejson.loads`.
*
* See https://simplejson.readthedocs.io/en/latest/#simplejson.loads
*/
private class SimplejsonLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
SimplejsonLoadsCall() { this = API::moduleImport("simplejson").getMember("loads").getACall() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("s")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "JSON" }
override predicate mayExecuteInput() { none() }
}
/**
* A call to `simplejson.load`.
*
* See https://simplejson.readthedocs.io/en/latest/#simplejson.load
*/
private class SimplejsonLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
SimplejsonLoadCall() { this = API::moduleImport("simplejson").getMember("load").getACall() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("fp")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "JSON" }
override predicate mayExecuteInput() { none() }
}
}

View File

@@ -511,7 +511,23 @@ private module Stdlib {
override predicate mayExecuteInput() { none() }
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("s")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "JSON" }
}
/**
* A call to `json.load`
* See https://docs.python.org/3/library/json.html#json.load
*/
private class JsonLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
JsonLoadCall() { this = json().getMember("load").getACall() }
override predicate mayExecuteInput() { none() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("fp")] }
override DataFlow::Node getOutput() { result = this }
@@ -525,13 +541,31 @@ private module Stdlib {
private class JsonDumpsCall extends Encoding::Range, DataFlow::CallCfgNode {
JsonDumpsCall() { this = json().getMember("dumps").getACall() }
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "JSON" }
}
/**
* A call to `json.dump`
* See https://docs.python.org/3/library/json.html#json.dump
*/
private class JsonDumpCall extends Encoding::Range, DataFlow::CallCfgNode {
JsonDumpCall() { this = json().getMember("dump").getACall() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] }
override DataFlow::Node getOutput() {
result.(DataFlow::PostUpdateNode).getPreUpdateNode() in [
this.getArg(1), this.getArgByName("fp")
]
}
override string getFormat() { result = "JSON" }
}
// ---------------------------------------------------------------------------
// cgi
// ---------------------------------------------------------------------------

View File

@@ -0,0 +1,76 @@
/**
* Provides classes modeling security-relevant aspects of the `ujson` PyPI package.
* See https://pypi.org/project/ujson/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `ujson` PyPI package.
* See https://pypi.org/project/ujson/.
*/
private module UjsonModel {
/**
* A call to `usjon.dumps` or `ujson.encode`.
*/
private class UjsonDumpsCall extends Encoding::Range, DataFlow::CallCfgNode {
UjsonDumpsCall() { this = API::moduleImport("ujson").getMember(["dumps", "encode"]).getACall() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "JSON" }
}
/**
* A call to `ujson.dump`.
*/
private class UjsonDumpCall extends Encoding::Range, DataFlow::CallCfgNode {
UjsonDumpCall() { this = API::moduleImport("ujson").getMember("dump").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getOutput() {
result.(DataFlow::PostUpdateNode).getPreUpdateNode() = this.getArg(1)
}
override string getFormat() { result = "JSON" }
}
/**
* A call to `ujson.loads` or `ujson.decode`.
*/
private class UjsonLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
UjsonLoadsCall() { this = API::moduleImport("ujson").getMember(["loads", "decode"]).getACall() }
// Note: Most other JSON libraries allow the keyword argument `s`, but as of version
// 4.0.2 `ujson` uses `obj` instead.
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "JSON" }
override predicate mayExecuteInput() { none() }
}
/**
* A call to `ujson.load`.
*/
private class UjsonLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
UjsonLoadCall() { this = API::moduleImport("ujson").getMember("load").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "JSON" }
override predicate mayExecuteInput() { none() }
}
}

View File

@@ -11,54 +11,43 @@ if TYPE_CHECKING:
# Actual tests
from io import StringIO
# Workaround for Python3 not having unicode
import sys
if sys.version_info[0] == 3:
unicode = str
import json
def test():
print("\n# test")
ts = TAINTED_STRING
import json
encoded = json.dumps(ts)
ensure_tainted(
encoded, # $ tainted
json.dumps(ts), # $ tainted
json.loads(json.dumps(ts)), # $ tainted
json.dumps(obj=ts), # $ tainted
json.loads(encoded), # $ tainted
json.loads(s=encoded), # $ tainted
)
# For Python2, need to convert to unicode for StringIO to work
tainted_filelike = StringIO(unicode(json.dumps(ts)))
# load/dump with file-like
tainted_filelike = StringIO()
json.dump(ts, tainted_filelike)
tainted_filelike.seek(0)
ensure_tainted(
tainted_filelike, # $ MISSING: tainted
json.load(tainted_filelike), # $ MISSING: tainted
tainted_filelike, # $ tainted
json.load(tainted_filelike), # $ tainted
)
def non_syntacical():
print("\n# non_syntacical")
ts = TAINTED_STRING
# a less syntactical approach
from json import load, loads, dumps
dumps_alias = dumps
# load/dump with file-like using keyword-args
tainted_filelike = StringIO()
json.dump(obj=ts, fp=tainted_filelike)
tainted_filelike.seek(0)
ensure_tainted(
dumps(ts), # $ tainted
dumps_alias(ts), # $ tainted
loads(dumps(ts)), # $ tainted
tainted_filelike, # $ tainted
json.load(fp=tainted_filelike), # $ tainted
)
# For Python2, need to convert to unicode for StringIO to work
tainted_filelike = StringIO(unicode(dumps(ts)))
ensure_tainted(
tainted_filelike, # $ MISSING: tainted
load(tainted_filelike), # $ MISSING: tainted
)
# Make tests runable
test()
non_syntacical()

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,3 @@
argumentToEnsureNotTaintedNotMarkedAsSpurious
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
failures

View File

@@ -0,0 +1 @@
import experimental.meta.InlineTaintTest

View File

@@ -0,0 +1,13 @@
import idna
def test_idna():
ts = TAINTED_STRING
tb = TAINTED_BYTES
ensure_tainted(
idna.encode(ts), # $ tainted encodeInput=ts encodeOutput=Attribute() encodeFormat=IDNA
idna.encode(s=ts), # $ tainted encodeInput=ts encodeOutput=Attribute() encodeFormat=IDNA
idna.decode(tb), # $ tainted decodeInput=tb decodeOutput=Attribute() decodeFormat=IDNA
idna.decode(s=tb), # $ tainted decodeInput=tb decodeOutput=Attribute() decodeFormat=IDNA
)

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,3 @@
argumentToEnsureNotTaintedNotMarkedAsSpurious
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
failures

View File

@@ -0,0 +1 @@
import experimental.meta.InlineTaintTest

View File

@@ -0,0 +1,46 @@
import simplejson
from io import StringIO
def test():
ts = TAINTED_STRING
tainted_obj = {"foo": ts}
encoded = simplejson.dumps(tainted_obj) # $ encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj
ensure_tainted(
encoded, # $ tainted
simplejson.dumps(tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj
simplejson.dumps(obj=tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj
simplejson.loads(encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded
simplejson.loads(s=encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded
)
# load/dump with file-like
tainted_filelike = StringIO()
simplejson.dump(tainted_obj, tainted_filelike) # $ encodeFormat=JSON encodeInput=tainted_obj
tainted_filelike.seek(0)
ensure_tainted(
tainted_filelike, # $ MISSING: tainted
simplejson.load(tainted_filelike), # $ decodeOutput=Attribute() decodeFormat=JSON decodeInput=tainted_filelike MISSING: tainted
)
# load/dump with file-like using keyword-args
tainted_filelike = StringIO()
simplejson.dump(obj=tainted_obj, fp=tainted_filelike) # $ encodeFormat=JSON encodeInput=tainted_obj
tainted_filelike.seek(0)
ensure_tainted(
tainted_filelike, # $ MISSING: tainted
simplejson.load(fp=tainted_filelike), # $ decodeOutput=Attribute() decodeFormat=JSON decodeInput=tainted_filelike MISSING: tainted
)
# To make things runable
TAINTED_STRING = "TAINTED_STRING"
def ensure_tainted(*args):
print("- ensure_tainted")
for i, arg in enumerate(args):
print("arg {}: {!r}".format(i, arg))
test()

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,3 @@
argumentToEnsureNotTaintedNotMarkedAsSpurious
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
failures

View File

@@ -0,0 +1 @@
import experimental.meta.InlineTaintTest

View File

@@ -0,0 +1,44 @@
import ujson
from io import StringIO
def test():
ts = TAINTED_STRING
tainted_obj = {"foo": ts}
encoded = ujson.dumps(tainted_obj) # $ encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj
ensure_tainted(
encoded, # $ tainted
ujson.dumps(tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj
ujson.dumps(obj=tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj
ujson.loads(encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded
ujson.loads(obj=encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded
ujson.encode(tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj
ujson.encode(obj=tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj
ujson.decode(encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded
ujson.decode(obj=encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded
)
# load/dump with file-like
tainted_filelike = StringIO()
ujson.dump(tainted_obj, tainted_filelike) # $ encodeFormat=JSON encodeInput=tainted_obj
tainted_filelike.seek(0)
ensure_tainted(
tainted_filelike, # $ MISSING: tainted
ujson.load(tainted_filelike), # $ decodeOutput=Attribute() decodeFormat=JSON decodeInput=tainted_filelike MISSING: tainted
)
# load/dump with file-like using keyword-args does not work in `ujson`
# To make things runable
TAINTED_STRING = "TAINTED_STRING"
def ensure_tainted(*args):
print("- ensure_tainted")
for i, arg in enumerate(args):
print("arg {}: {!r}".format(i, arg))
test()