From 7ed20a8b2c07be79f5ba5936d52870fd2c21940f Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 10 May 2021 10:55:21 +0200 Subject: [PATCH 1/8] Python: Add reminder to update docs for new frameworks --- python/ql/src/semmle/python/Frameworks.qll | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/ql/src/semmle/python/Frameworks.qll b/python/ql/src/semmle/python/Frameworks.qll index a00511ca545..1c8ef1f551b 100644 --- a/python/ql/src/semmle/python/Frameworks.qll +++ b/python/ql/src/semmle/python/Frameworks.qll @@ -2,6 +2,8 @@ * Helper file that imports all framework modeling. */ +// If you add modeling of a new framework/library, remember to add it it to the docs in +// `docs/codeql/support/reusables/frameworks.rst` private import semmle.python.frameworks.Cryptodome private import semmle.python.frameworks.Cryptography private import semmle.python.frameworks.Dill From 8afdf2654035bcc38eb4c343572f94d0b7d4560d Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 10 May 2021 10:57:33 +0200 Subject: [PATCH 2/8] Python: Add modeling of idna PyPI package --- docs/codeql/support/reusables/frameworks.rst | 1 + .../2021-05-10-idna-add-modeling.md | 2 + python/ql/src/semmle/python/Frameworks.qll | 1 + .../ql/src/semmle/python/frameworks/Idna.qll | 40 +++++++++++++++++++ .../frameworks/idna/ConceptsTest.expected | 0 .../frameworks/idna/ConceptsTest.ql | 2 + .../frameworks/idna/InlineTaintTest.expected | 3 ++ .../frameworks/idna/InlineTaintTest.ql | 1 + .../frameworks/idna/taint_test.py | 13 ++++++ 9 files changed, 63 insertions(+) create mode 100644 python/change-notes/2021-05-10-idna-add-modeling.md create mode 100644 python/ql/src/semmle/python/frameworks/Idna.qll create mode 100644 python/ql/test/library-tests/frameworks/idna/ConceptsTest.expected create mode 100644 python/ql/test/library-tests/frameworks/idna/ConceptsTest.ql create mode 100644 python/ql/test/library-tests/frameworks/idna/InlineTaintTest.expected create mode 100644 python/ql/test/library-tests/frameworks/idna/InlineTaintTest.ql create mode 100644 python/ql/test/library-tests/frameworks/idna/taint_test.py diff --git a/docs/codeql/support/reusables/frameworks.rst b/docs/codeql/support/reusables/frameworks.rst index afdf63bdb7b..d54c07454bc 100644 --- a/docs/codeql/support/reusables/frameworks.rst +++ b/docs/codeql/support/reusables/frameworks.rst @@ -158,6 +158,7 @@ Python built-in support dill, Serialization fabric, Utility library invoke, Utility library + idna, Utility library mysql-connector-python, Database MySQLdb, Database psycopg2, Database diff --git a/python/change-notes/2021-05-10-idna-add-modeling.md b/python/change-notes/2021-05-10-idna-add-modeling.md new file mode 100644 index 00000000000..95856ffe5a8 --- /dev/null +++ b/python/change-notes/2021-05-10-idna-add-modeling.md @@ -0,0 +1,2 @@ +lgtm,codescanning +* Added modeling of the PyPI package `idna`, for encoding/decoding Internationalised Domain Names in Applications. diff --git a/python/ql/src/semmle/python/Frameworks.qll b/python/ql/src/semmle/python/Frameworks.qll index 1c8ef1f551b..ed8f308c70a 100644 --- a/python/ql/src/semmle/python/Frameworks.qll +++ b/python/ql/src/semmle/python/Frameworks.qll @@ -10,6 +10,7 @@ private import semmle.python.frameworks.Dill private import semmle.python.frameworks.Django private import semmle.python.frameworks.Fabric private import semmle.python.frameworks.Flask +private import semmle.python.frameworks.Idna private import semmle.python.frameworks.Invoke private import semmle.python.frameworks.MysqlConnectorPython private import semmle.python.frameworks.MySQLdb diff --git a/python/ql/src/semmle/python/frameworks/Idna.qll b/python/ql/src/semmle/python/frameworks/Idna.qll new file mode 100644 index 00000000000..331cafbd084 --- /dev/null +++ b/python/ql/src/semmle/python/frameworks/Idna.qll @@ -0,0 +1,40 @@ +/** + * Provides classes modeling security-relevant aspects of the `idna` PyPI package. + * See https://pypi.org/project/idna/. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.TaintTracking +private import semmle.python.Concepts +private import semmle.python.ApiGraphs + +/** + * Provides models for the `idna` PyPI package. + * See https://pypi.org/project/idna/. + */ +private module IdnaModel { + /** A call to `idna.encode`. */ + private class IdnaEncodeCall extends Encoding::Range, DataFlow::CallCfgNode { + IdnaEncodeCall() { this = API::moduleImport("idna").getMember("encode").getACall() } + + override DataFlow::Node getAnInput() { result = [this.getArg(0), this.getArgByName("s")] } + + override DataFlow::Node getOutput() { result = this } + + override string getFormat() { result = "IDNA" } + } + + /** A call to `idna.decode`. */ + private class IdnaDecodeCall extends Decoding::Range, DataFlow::CallCfgNode { + IdnaDecodeCall() { this = API::moduleImport("idna").getMember("decode").getACall() } + + override DataFlow::Node getAnInput() { result = [this.getArg(0), this.getArgByName("s")] } + + override DataFlow::Node getOutput() { result = this } + + override string getFormat() { result = "IDNA" } + + override predicate mayExecuteInput() { none() } + } +} diff --git a/python/ql/test/library-tests/frameworks/idna/ConceptsTest.expected b/python/ql/test/library-tests/frameworks/idna/ConceptsTest.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/library-tests/frameworks/idna/ConceptsTest.ql b/python/ql/test/library-tests/frameworks/idna/ConceptsTest.ql new file mode 100644 index 00000000000..b557a0bccb6 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/idna/ConceptsTest.ql @@ -0,0 +1,2 @@ +import python +import experimental.meta.ConceptsTest diff --git a/python/ql/test/library-tests/frameworks/idna/InlineTaintTest.expected b/python/ql/test/library-tests/frameworks/idna/InlineTaintTest.expected new file mode 100644 index 00000000000..79d760d87f4 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/idna/InlineTaintTest.expected @@ -0,0 +1,3 @@ +argumentToEnsureNotTaintedNotMarkedAsSpurious +untaintedArgumentToEnsureTaintedNotMarkedAsMissing +failures diff --git a/python/ql/test/library-tests/frameworks/idna/InlineTaintTest.ql b/python/ql/test/library-tests/frameworks/idna/InlineTaintTest.ql new file mode 100644 index 00000000000..027ad8667be --- /dev/null +++ b/python/ql/test/library-tests/frameworks/idna/InlineTaintTest.ql @@ -0,0 +1 @@ +import experimental.meta.InlineTaintTest diff --git a/python/ql/test/library-tests/frameworks/idna/taint_test.py b/python/ql/test/library-tests/frameworks/idna/taint_test.py new file mode 100644 index 00000000000..61b9dad166c --- /dev/null +++ b/python/ql/test/library-tests/frameworks/idna/taint_test.py @@ -0,0 +1,13 @@ +import idna + +def test_idna(): + ts = TAINTED_STRING + tb = TAINTED_BYTES + + ensure_tainted( + idna.encode(ts), # $ tainted encodeInput=ts encodeOutput=Attribute() encodeFormat=IDNA + idna.encode(s=ts), # $ tainted encodeInput=ts encodeOutput=Attribute() encodeFormat=IDNA + + idna.decode(tb), # $ tainted decodeInput=tb decodeOutput=Attribute() decodeFormat=IDNA + idna.decode(s=tb), # $ tainted decodeInput=tb decodeOutput=Attribute() decodeFormat=IDNA + ) From 3fe9a3d9334699a4db47ebd6ee9e5c3d2640cd45 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 10 May 2021 12:15:40 +0200 Subject: [PATCH 3/8] Python: Add modeling of simplejson PyPI package I noticed that we don't handle PostUpdateNote very well in the concept tests, for exmaple for `json.dump(...)` there _should_ have been an `encodeOutput` as part of the inline expectations. I'll work on fixing that up in a separate PR, to keep things clean. --- docs/codeql/support/reusables/frameworks.rst | 1 + .../2021-05-10-simplejson-add-modeling.md | 2 + python/ql/src/semmle/python/Frameworks.qll | 1 + .../semmle/python/frameworks/Simplejson.qll | 84 +++++++++++++++++++ .../simplejson/ConceptsTest.expected | 0 .../frameworks/simplejson/ConceptsTest.ql | 2 + .../simplejson/InlineTaintTest.expected | 3 + .../frameworks/simplejson/InlineTaintTest.ql | 1 + .../frameworks/simplejson/taint_test.py | 46 ++++++++++ 9 files changed, 140 insertions(+) create mode 100644 python/change-notes/2021-05-10-simplejson-add-modeling.md create mode 100644 python/ql/src/semmle/python/frameworks/Simplejson.qll create mode 100644 python/ql/test/library-tests/frameworks/simplejson/ConceptsTest.expected create mode 100644 python/ql/test/library-tests/frameworks/simplejson/ConceptsTest.ql create mode 100644 python/ql/test/library-tests/frameworks/simplejson/InlineTaintTest.expected create mode 100644 python/ql/test/library-tests/frameworks/simplejson/InlineTaintTest.ql create mode 100644 python/ql/test/library-tests/frameworks/simplejson/taint_test.py diff --git a/docs/codeql/support/reusables/frameworks.rst b/docs/codeql/support/reusables/frameworks.rst index d54c07454bc..4ea461ed435 100644 --- a/docs/codeql/support/reusables/frameworks.rst +++ b/docs/codeql/support/reusables/frameworks.rst @@ -156,6 +156,7 @@ Python built-in support Tornado, Web framework PyYAML, Serialization dill, Serialization + simplejson, Serialization fabric, Utility library invoke, Utility library idna, Utility library diff --git a/python/change-notes/2021-05-10-simplejson-add-modeling.md b/python/change-notes/2021-05-10-simplejson-add-modeling.md new file mode 100644 index 00000000000..910441bdeac --- /dev/null +++ b/python/change-notes/2021-05-10-simplejson-add-modeling.md @@ -0,0 +1,2 @@ +lgtm,codescanning +* Added modeling of the PyPI package `simplejson`. diff --git a/python/ql/src/semmle/python/Frameworks.qll b/python/ql/src/semmle/python/Frameworks.qll index ed8f308c70a..bec73652394 100644 --- a/python/ql/src/semmle/python/Frameworks.qll +++ b/python/ql/src/semmle/python/Frameworks.qll @@ -16,6 +16,7 @@ private import semmle.python.frameworks.MysqlConnectorPython private import semmle.python.frameworks.MySQLdb private import semmle.python.frameworks.Psycopg2 private import semmle.python.frameworks.PyMySQL +private import semmle.python.frameworks.Simplejson private import semmle.python.frameworks.Stdlib private import semmle.python.frameworks.Tornado private import semmle.python.frameworks.Yaml diff --git a/python/ql/src/semmle/python/frameworks/Simplejson.qll b/python/ql/src/semmle/python/frameworks/Simplejson.qll new file mode 100644 index 00000000000..41676705a40 --- /dev/null +++ b/python/ql/src/semmle/python/frameworks/Simplejson.qll @@ -0,0 +1,84 @@ +/** + * Provides classes modeling security-relevant aspects of the `simplejson` PyPI package. + * See https://simplejson.readthedocs.io/en/latest/. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.TaintTracking +private import semmle.python.Concepts +private import semmle.python.ApiGraphs + +/** + * Provides models for the `simplejson` PyPI package. + * See https://simplejson.readthedocs.io/en/latest/. + */ +private module SimplejsonModel { + /** + * A call to `simplejson.dumps`. + * + * See https://simplejson.readthedocs.io/en/latest/#simplejson.dumps + */ + private class SimplejsonDumpsCall extends Encoding::Range, DataFlow::CallCfgNode { + SimplejsonDumpsCall() { this = API::moduleImport("simplejson").getMember("dumps").getACall() } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] } + + override DataFlow::Node getOutput() { result = this } + + override string getFormat() { result = "JSON" } + } + + /** + * A call to `simplejson.dump`. + * + * See https://simplejson.readthedocs.io/en/latest/#simplejson.dump + */ + private class SimplejsonDumpCall extends Encoding::Range, DataFlow::CallCfgNode { + SimplejsonDumpCall() { this = API::moduleImport("simplejson").getMember("dump").getACall() } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] } + + override DataFlow::Node getOutput() { + result.(DataFlow::PostUpdateNode).getPreUpdateNode() in [ + this.getArg(1), this.getArgByName("fp") + ] + } + + override string getFormat() { result = "JSON" } + } + + /** + * A call to `simplejson.loads`. + * + * See https://simplejson.readthedocs.io/en/latest/#simplejson.loads + */ + private class SimplejsonLoadsCall extends Decoding::Range, DataFlow::CallCfgNode { + SimplejsonLoadsCall() { this = API::moduleImport("simplejson").getMember("loads").getACall() } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("s")] } + + override DataFlow::Node getOutput() { result = this } + + override string getFormat() { result = "JSON" } + + override predicate mayExecuteInput() { none() } + } + + /** + * A call to `simplejson.load`. + * + * See https://simplejson.readthedocs.io/en/latest/#simplejson.load + */ + private class SimplejsonLoadCall extends Decoding::Range, DataFlow::CallCfgNode { + SimplejsonLoadCall() { this = API::moduleImport("simplejson").getMember("load").getACall() } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("fp")] } + + override DataFlow::Node getOutput() { result = this } + + override string getFormat() { result = "JSON" } + + override predicate mayExecuteInput() { none() } + } +} diff --git a/python/ql/test/library-tests/frameworks/simplejson/ConceptsTest.expected b/python/ql/test/library-tests/frameworks/simplejson/ConceptsTest.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/library-tests/frameworks/simplejson/ConceptsTest.ql b/python/ql/test/library-tests/frameworks/simplejson/ConceptsTest.ql new file mode 100644 index 00000000000..b557a0bccb6 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/simplejson/ConceptsTest.ql @@ -0,0 +1,2 @@ +import python +import experimental.meta.ConceptsTest diff --git a/python/ql/test/library-tests/frameworks/simplejson/InlineTaintTest.expected b/python/ql/test/library-tests/frameworks/simplejson/InlineTaintTest.expected new file mode 100644 index 00000000000..79d760d87f4 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/simplejson/InlineTaintTest.expected @@ -0,0 +1,3 @@ +argumentToEnsureNotTaintedNotMarkedAsSpurious +untaintedArgumentToEnsureTaintedNotMarkedAsMissing +failures diff --git a/python/ql/test/library-tests/frameworks/simplejson/InlineTaintTest.ql b/python/ql/test/library-tests/frameworks/simplejson/InlineTaintTest.ql new file mode 100644 index 00000000000..027ad8667be --- /dev/null +++ b/python/ql/test/library-tests/frameworks/simplejson/InlineTaintTest.ql @@ -0,0 +1 @@ +import experimental.meta.InlineTaintTest diff --git a/python/ql/test/library-tests/frameworks/simplejson/taint_test.py b/python/ql/test/library-tests/frameworks/simplejson/taint_test.py new file mode 100644 index 00000000000..2b7de24a0c2 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/simplejson/taint_test.py @@ -0,0 +1,46 @@ +import simplejson +from io import StringIO + +def test(): + ts = TAINTED_STRING + tainted_obj = {"foo": ts} + + encoded = simplejson.dumps(tainted_obj) # $ encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj + + ensure_tainted( + encoded, # $ tainted + simplejson.dumps(tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj + simplejson.dumps(obj=tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj + simplejson.loads(encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded + simplejson.loads(s=encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded + ) + + # load/dump with file-like + tainted_filelike = StringIO() + simplejson.dump(tainted_obj, tainted_filelike) # $ encodeFormat=JSON encodeInput=tainted_obj + + tainted_filelike.seek(0) + ensure_tainted( + tainted_filelike, # $ tainted + simplejson.load(tainted_filelike), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=tainted_filelike + ) + + # load/dump with file-like using keyword-args + tainted_filelike = StringIO() + simplejson.dump(obj=tainted_obj, fp=tainted_filelike) # $ encodeFormat=JSON encodeInput=tainted_obj + + tainted_filelike.seek(0) + ensure_tainted( + tainted_filelike, # $ tainted + simplejson.load(fp=tainted_filelike), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=tainted_filelike + ) + +# To make things runable + +TAINTED_STRING = "TAINTED_STRING" +def ensure_tainted(*args): + print("- ensure_tainted") + for i, arg in enumerate(args): + print("arg {}: {!r}".format(i, arg)) + +test() From 784e0cdb96b1f5dbe72aecc0d595e78cdd6dd085 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 10 May 2021 14:28:54 +0200 Subject: [PATCH 4/8] Python: Improve tests of `json` module Inspired by the work on previous commit --- .../defaultAdditionalTaintStep/test_json.py | 45 +++++++------------ 1 file changed, 17 insertions(+), 28 deletions(-) diff --git a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_json.py b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_json.py index a7398b4d001..46ac4afc219 100644 --- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_json.py +++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_json.py @@ -11,54 +11,43 @@ if TYPE_CHECKING: # Actual tests from io import StringIO - -# Workaround for Python3 not having unicode -import sys -if sys.version_info[0] == 3: - unicode = str +import json def test(): print("\n# test") ts = TAINTED_STRING - import json + + encoded = json.dumps(ts) ensure_tainted( + encoded, # $ tainted json.dumps(ts), # $ tainted - json.loads(json.dumps(ts)), # $ tainted + json.dumps(obj=ts), # $ MISSING: tainted + json.loads(encoded), # $ tainted + json.loads(s=encoded), # $ MISSING: tainted ) - # For Python2, need to convert to unicode for StringIO to work - tainted_filelike = StringIO(unicode(json.dumps(ts))) + # load/dump with file-like + tainted_filelike = StringIO() + json.dump(ts, tainted_filelike) + tainted_filelike.seek(0) ensure_tainted( tainted_filelike, # $ MISSING: tainted json.load(tainted_filelike), # $ MISSING: tainted ) -def non_syntacical(): - print("\n# non_syntacical") - ts = TAINTED_STRING - - # a less syntactical approach - from json import load, loads, dumps - - dumps_alias = dumps - - ensure_tainted( - dumps(ts), # $ tainted - dumps_alias(ts), # $ tainted - loads(dumps(ts)), # $ tainted - ) - - # For Python2, need to convert to unicode for StringIO to work - tainted_filelike = StringIO(unicode(dumps(ts))) + # load/dump with file-like using keyword-args + tainted_filelike = StringIO() + json.dump(obj=ts, fp=tainted_filelike) + tainted_filelike.seek(0) ensure_tainted( tainted_filelike, # $ MISSING: tainted - load(tainted_filelike), # $ MISSING: tainted + json.load(fp=tainted_filelike), # $ MISSING: tainted ) + # Make tests runable test() -non_syntacical() From 63f28d7d9b1946b97c1b3150957268ac5e316c9e Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 10 May 2021 14:33:27 +0200 Subject: [PATCH 5/8] Python: Model keyword args to json loads/dumps --- python/ql/src/semmle/python/frameworks/Stdlib.qll | 4 ++-- .../tainttracking/defaultAdditionalTaintStep/test_json.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/ql/src/semmle/python/frameworks/Stdlib.qll b/python/ql/src/semmle/python/frameworks/Stdlib.qll index 07753e9fde5..afc1fb45f25 100644 --- a/python/ql/src/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/src/semmle/python/frameworks/Stdlib.qll @@ -511,7 +511,7 @@ private module Stdlib { override predicate mayExecuteInput() { none() } - override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) } + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("s")] } override DataFlow::Node getOutput() { result = this } @@ -525,7 +525,7 @@ private module Stdlib { private class JsonDumpsCall extends Encoding::Range, DataFlow::CallCfgNode { JsonDumpsCall() { this = json().getMember("dumps").getACall() } - override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) } + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] } override DataFlow::Node getOutput() { result = this } diff --git a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_json.py b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_json.py index 46ac4afc219..f91bcacd3bc 100644 --- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_json.py +++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_json.py @@ -22,9 +22,9 @@ def test(): ensure_tainted( encoded, # $ tainted json.dumps(ts), # $ tainted - json.dumps(obj=ts), # $ MISSING: tainted + json.dumps(obj=ts), # $ tainted json.loads(encoded), # $ tainted - json.loads(s=encoded), # $ MISSING: tainted + json.loads(s=encoded), # $ tainted ) # load/dump with file-like From 72d08f4d6eeea58d96bd8a252203caf687ee93ed Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 10 May 2021 14:35:33 +0200 Subject: [PATCH 6/8] Python: Model json load/dump --- .../src/semmle/python/frameworks/Stdlib.qll | 34 +++++++++++++++++++ .../defaultAdditionalTaintStep/test_json.py | 8 ++--- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/python/ql/src/semmle/python/frameworks/Stdlib.qll b/python/ql/src/semmle/python/frameworks/Stdlib.qll index afc1fb45f25..69023b8940a 100644 --- a/python/ql/src/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/src/semmle/python/frameworks/Stdlib.qll @@ -518,6 +518,22 @@ private module Stdlib { override string getFormat() { result = "JSON" } } + /** + * A call to `json.load` + * See https://docs.python.org/3/library/json.html#json.load + */ + private class JsonLoadCall extends Decoding::Range, DataFlow::CallCfgNode { + JsonLoadCall() { this = json().getMember("load").getACall() } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("fp")] } + + override DataFlow::Node getOutput() { result = this } + + override string getFormat() { result = "JSON" } + } + /** * A call to `json.dumps` * See https://docs.python.org/3/library/json.html#json.dumps @@ -532,6 +548,24 @@ private module Stdlib { override string getFormat() { result = "JSON" } } + /** + * A call to `json.dump` + * See https://docs.python.org/3/library/json.html#json.dump + */ + private class JsonDumpCall extends Encoding::Range, DataFlow::CallCfgNode { + JsonDumpCall() { this = json().getMember("dump").getACall() } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] } + + override DataFlow::Node getOutput() { + result.(DataFlow::PostUpdateNode).getPreUpdateNode() in [ + this.getArg(1), this.getArgByName("fp") + ] + } + + override string getFormat() { result = "JSON" } + } + // --------------------------------------------------------------------------- // cgi // --------------------------------------------------------------------------- diff --git a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_json.py b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_json.py index f91bcacd3bc..b1b0536b03b 100644 --- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_json.py +++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_json.py @@ -33,8 +33,8 @@ def test(): tainted_filelike.seek(0) ensure_tainted( - tainted_filelike, # $ MISSING: tainted - json.load(tainted_filelike), # $ MISSING: tainted + tainted_filelike, # $ tainted + json.load(tainted_filelike), # $ tainted ) # load/dump with file-like using keyword-args @@ -43,8 +43,8 @@ def test(): tainted_filelike.seek(0) ensure_tainted( - tainted_filelike, # $ MISSING: tainted - json.load(fp=tainted_filelike), # $ MISSING: tainted + tainted_filelike, # $ tainted + json.load(fp=tainted_filelike), # $ tainted ) From c2a6b811fcb2236c2400a18a54041543aa729dbc Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 10 May 2021 15:07:55 +0200 Subject: [PATCH 7/8] Python: Add modeling of ujson PyPI package The problem with `tainted_filelike` not having taint, is that in the call `ujson.dump(tainted_obj, tainted_filelike)` there is no PostUpdateNote for `tainted_filelike` :( The reason is that points-to is not able to resolve the call, so none of the clauses in `argumentPreUpdateNode` matches See https://github.com/github/codeql/blob/08731fc6cf4ba6951cd4e8f239eac1f3388d3957/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPrivate.qll#L101-L111 Let's deal with that issue in an other PR though --- docs/codeql/support/reusables/frameworks.rst | 1 + .../2021-05-10-ujson-add-modeling.md | 2 + python/ql/src/semmle/python/Frameworks.qll | 1 + .../ql/src/semmle/python/frameworks/Ujson.qll | 76 +++++++++++++++++++ .../frameworks/ujson/ConceptsTest.expected | 0 .../frameworks/ujson/ConceptsTest.ql | 2 + .../frameworks/ujson/InlineTaintTest.expected | 3 + .../frameworks/ujson/InlineTaintTest.ql | 1 + .../frameworks/ujson/taint_test.py | 44 +++++++++++ 9 files changed, 130 insertions(+) create mode 100644 python/change-notes/2021-05-10-ujson-add-modeling.md create mode 100644 python/ql/src/semmle/python/frameworks/Ujson.qll create mode 100644 python/ql/test/library-tests/frameworks/ujson/ConceptsTest.expected create mode 100644 python/ql/test/library-tests/frameworks/ujson/ConceptsTest.ql create mode 100644 python/ql/test/library-tests/frameworks/ujson/InlineTaintTest.expected create mode 100644 python/ql/test/library-tests/frameworks/ujson/InlineTaintTest.ql create mode 100644 python/ql/test/library-tests/frameworks/ujson/taint_test.py diff --git a/docs/codeql/support/reusables/frameworks.rst b/docs/codeql/support/reusables/frameworks.rst index 4ea461ed435..5bc7d572e7f 100644 --- a/docs/codeql/support/reusables/frameworks.rst +++ b/docs/codeql/support/reusables/frameworks.rst @@ -157,6 +157,7 @@ Python built-in support PyYAML, Serialization dill, Serialization simplejson, Serialization + ujson, Serialization fabric, Utility library invoke, Utility library idna, Utility library diff --git a/python/change-notes/2021-05-10-ujson-add-modeling.md b/python/change-notes/2021-05-10-ujson-add-modeling.md new file mode 100644 index 00000000000..2d7cdc4b68a --- /dev/null +++ b/python/change-notes/2021-05-10-ujson-add-modeling.md @@ -0,0 +1,2 @@ +lgtm,codescanning +* Added modeling of the PyPI package `ujson`. diff --git a/python/ql/src/semmle/python/Frameworks.qll b/python/ql/src/semmle/python/Frameworks.qll index bec73652394..3115c3ffac6 100644 --- a/python/ql/src/semmle/python/Frameworks.qll +++ b/python/ql/src/semmle/python/Frameworks.qll @@ -19,4 +19,5 @@ private import semmle.python.frameworks.PyMySQL private import semmle.python.frameworks.Simplejson private import semmle.python.frameworks.Stdlib private import semmle.python.frameworks.Tornado +private import semmle.python.frameworks.Ujson private import semmle.python.frameworks.Yaml diff --git a/python/ql/src/semmle/python/frameworks/Ujson.qll b/python/ql/src/semmle/python/frameworks/Ujson.qll new file mode 100644 index 00000000000..145f7f883a9 --- /dev/null +++ b/python/ql/src/semmle/python/frameworks/Ujson.qll @@ -0,0 +1,76 @@ +/** + * Provides classes modeling security-relevant aspects of the `ujson` PyPI package. + * See https://pypi.org/project/ujson/. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.TaintTracking +private import semmle.python.Concepts +private import semmle.python.ApiGraphs + +/** + * Provides models for the `ujson` PyPI package. + * See https://pypi.org/project/ujson/. + */ +private module UjsonModel { + /** + * A call to `usjon.dumps` or `ujson.encode`. + */ + private class UjsonDumpsCall extends Encoding::Range, DataFlow::CallCfgNode { + UjsonDumpsCall() { this = API::moduleImport("ujson").getMember(["dumps", "encode"]).getACall() } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] } + + override DataFlow::Node getOutput() { result = this } + + override string getFormat() { result = "JSON" } + } + + /** + * A call to `ujson.dump`. + */ + private class UjsonDumpCall extends Encoding::Range, DataFlow::CallCfgNode { + UjsonDumpCall() { this = API::moduleImport("ujson").getMember("dump").getACall() } + + override DataFlow::Node getAnInput() { result = this.getArg(0) } + + override DataFlow::Node getOutput() { + result.(DataFlow::PostUpdateNode).getPreUpdateNode() = this.getArg(1) + } + + override string getFormat() { result = "JSON" } + } + + /** + * A call to `ujson.loads` or `ujson.decode`. + */ + private class UjsonLoadsCall extends Decoding::Range, DataFlow::CallCfgNode { + UjsonLoadsCall() { this = API::moduleImport("ujson").getMember(["loads", "decode"]).getACall() } + + // Note: Most other JSON libraries allow the keyword argument `s`, but as of version + // 4.0.2 `ujson` uses `obj` instead. + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("obj")] } + + override DataFlow::Node getOutput() { result = this } + + override string getFormat() { result = "JSON" } + + override predicate mayExecuteInput() { none() } + } + + /** + * A call to `ujson.load`. + */ + private class UjsonLoadCall extends Decoding::Range, DataFlow::CallCfgNode { + UjsonLoadCall() { this = API::moduleImport("ujson").getMember("load").getACall() } + + override DataFlow::Node getAnInput() { result = this.getArg(0) } + + override DataFlow::Node getOutput() { result = this } + + override string getFormat() { result = "JSON" } + + override predicate mayExecuteInput() { none() } + } +} diff --git a/python/ql/test/library-tests/frameworks/ujson/ConceptsTest.expected b/python/ql/test/library-tests/frameworks/ujson/ConceptsTest.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/ql/test/library-tests/frameworks/ujson/ConceptsTest.ql b/python/ql/test/library-tests/frameworks/ujson/ConceptsTest.ql new file mode 100644 index 00000000000..b557a0bccb6 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/ujson/ConceptsTest.ql @@ -0,0 +1,2 @@ +import python +import experimental.meta.ConceptsTest diff --git a/python/ql/test/library-tests/frameworks/ujson/InlineTaintTest.expected b/python/ql/test/library-tests/frameworks/ujson/InlineTaintTest.expected new file mode 100644 index 00000000000..79d760d87f4 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/ujson/InlineTaintTest.expected @@ -0,0 +1,3 @@ +argumentToEnsureNotTaintedNotMarkedAsSpurious +untaintedArgumentToEnsureTaintedNotMarkedAsMissing +failures diff --git a/python/ql/test/library-tests/frameworks/ujson/InlineTaintTest.ql b/python/ql/test/library-tests/frameworks/ujson/InlineTaintTest.ql new file mode 100644 index 00000000000..027ad8667be --- /dev/null +++ b/python/ql/test/library-tests/frameworks/ujson/InlineTaintTest.ql @@ -0,0 +1 @@ +import experimental.meta.InlineTaintTest diff --git a/python/ql/test/library-tests/frameworks/ujson/taint_test.py b/python/ql/test/library-tests/frameworks/ujson/taint_test.py new file mode 100644 index 00000000000..2c965518393 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/ujson/taint_test.py @@ -0,0 +1,44 @@ +import ujson +from io import StringIO + +def test(): + ts = TAINTED_STRING + tainted_obj = {"foo": ts} + + encoded = ujson.dumps(tainted_obj) # $ encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj + + ensure_tainted( + encoded, # $ tainted + ujson.dumps(tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj + ujson.dumps(obj=tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj + ujson.loads(encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded + ujson.loads(obj=encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded + + ujson.encode(tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj + ujson.encode(obj=tainted_obj), # $ tainted encodeOutput=Attribute() encodeFormat=JSON encodeInput=tainted_obj + ujson.decode(encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded + ujson.decode(obj=encoded), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=encoded + ) + + # load/dump with file-like + tainted_filelike = StringIO() + ujson.dump(tainted_obj, tainted_filelike) # $ encodeFormat=JSON encodeInput=tainted_obj + + tainted_filelike.seek(0) + ensure_tainted( + tainted_filelike, # $ MISSING: tainted + ujson.load(tainted_filelike), # $ decodeOutput=Attribute() decodeFormat=JSON decodeInput=tainted_filelike MISSING: tainted + ) + + # load/dump with file-like using keyword-args does not work in `ujson` + + +# To make things runable + +TAINTED_STRING = "TAINTED_STRING" +def ensure_tainted(*args): + print("- ensure_tainted") + for i, arg in enumerate(args): + print("arg {}: {!r}".format(i, arg)) + +test() From 1b0d5053e7a7a1fccaa463761fc8d7a5e142f7a5 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 10 May 2021 16:21:29 +0200 Subject: [PATCH 8/8] Python: simplejson load/dump only works with lib installed Which I had done locally. Problem is the same about not having PostUpdateNode when points-to is not able to resolve the call, so I'm happy to just make CI happy right now, and hopefully we'll get a fix to the underlying problem soon :blush: --- .../library-tests/frameworks/simplejson/taint_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/ql/test/library-tests/frameworks/simplejson/taint_test.py b/python/ql/test/library-tests/frameworks/simplejson/taint_test.py index 2b7de24a0c2..6f5d45b0b33 100644 --- a/python/ql/test/library-tests/frameworks/simplejson/taint_test.py +++ b/python/ql/test/library-tests/frameworks/simplejson/taint_test.py @@ -21,8 +21,8 @@ def test(): tainted_filelike.seek(0) ensure_tainted( - tainted_filelike, # $ tainted - simplejson.load(tainted_filelike), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=tainted_filelike + tainted_filelike, # $ MISSING: tainted + simplejson.load(tainted_filelike), # $ decodeOutput=Attribute() decodeFormat=JSON decodeInput=tainted_filelike MISSING: tainted ) # load/dump with file-like using keyword-args @@ -31,8 +31,8 @@ def test(): tainted_filelike.seek(0) ensure_tainted( - tainted_filelike, # $ tainted - simplejson.load(fp=tainted_filelike), # $ tainted decodeOutput=Attribute() decodeFormat=JSON decodeInput=tainted_filelike + tainted_filelike, # $ MISSING: tainted + simplejson.load(fp=tainted_filelike), # $ decodeOutput=Attribute() decodeFormat=JSON decodeInput=tainted_filelike MISSING: tainted ) # To make things runable