Merge pull request #7023 from RasmusWL/toml

Python: Add modeling of `toml`
This commit is contained in:
yoff
2021-11-02 14:42:06 +01:00
committed by GitHub
7 changed files with 145 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the PyPI package `toml`, which provides encoding/decoding of TOML documents, leading to new taint-tracking steps.

View File

@@ -33,6 +33,7 @@ private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.SqlAlchemy
private import semmle.python.frameworks.Starlette
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.Toml
private import semmle.python.frameworks.Tornado
private import semmle.python.frameworks.Twisted
private import semmle.python.frameworks.Ujson

View File

@@ -0,0 +1,101 @@
/**
* Provides classes modeling security-relevant aspects of the `toml` PyPI package.
*
* See
* - https://pypi.org/project/toml/
* - https://github.com/uiri/toml#api-reference
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides classes modeling security-relevant aspects of the `toml` PyPI package
*
* See
* - https://pypi.org/project/toml/
* - https://github.com/uiri/toml#api-reference
*/
private module Toml {
/**
* A call to `toml.loads`
*
* See https://github.com/uiri/toml#api-reference
*/
private class TomlLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
TomlLoadsCall() {
this = API::moduleImport("toml").getMember("loads").getACall()
or
this = API::moduleImport("toml").getMember("decoder").getMember("loads").getACall()
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("s")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "TOML" }
}
/**
* A call to `toml.load`
*
* See https://github.com/uiri/toml#api-reference
*/
private class TomlLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
TomlLoadCall() {
this = API::moduleImport("toml").getMember("load").getACall()
or
this = API::moduleImport("toml").getMember("decoder").getMember("load").getACall()
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("f")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "TOML" }
}
/**
* A call to `toml.dumps`
*
* See https://github.com/uiri/toml#api-reference
*/
private class TomlDumpsCall extends Encoding::Range, DataFlow::CallCfgNode {
TomlDumpsCall() {
this = API::moduleImport("toml").getMember("dumps").getACall()
or
this = API::moduleImport("toml").getMember("encoder").getMember("dumps").getACall()
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("o")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "TOML" }
}
/**
* A call to `toml.dump`
*
* See https://github.com/uiri/toml#api-reference
*/
private class TomlDumpCall extends Encoding::Range, DataFlow::CallCfgNode {
TomlDumpCall() {
this = API::moduleImport("toml").getMember("dump").getACall()
or
this = API::moduleImport("toml").getMember("encoder").getMember("dump").getACall()
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("o")] }
override DataFlow::Node getOutput() { result in [this.getArg(1), this.getArgByName("f")] }
override string getFormat() { result = "TOML" }
}
}

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,38 @@
import toml
from io import StringIO
encoded = 'title = "example"\n'
decoded = {"title" : "example"}
# LOADING
assert decoded == toml.loads(encoded) # $ decodeInput=encoded decodeFormat=TOML decodeOutput=toml.loads(..)
assert decoded == toml.loads(s=encoded) # $ decodeInput=encoded decodeFormat=TOML decodeOutput=toml.loads(..)
# this is not the official way to do things, but it works
assert decoded == toml.decoder.loads(encoded) # $ decodeInput=encoded decodeFormat=TOML decodeOutput=toml.decoder.loads(..)
f_encoded = StringIO(encoded)
assert decoded == toml.load(f_encoded) # $ decodeInput=f_encoded decodeFormat=TOML decodeOutput=toml.load(..)
f_encoded = StringIO(encoded)
assert decoded == toml.load(f=f_encoded) # $ decodeInput=f_encoded decodeFormat=TOML decodeOutput=toml.load(..)
f_encoded = StringIO(encoded)
assert decoded == toml.decoder.load(f_encoded) # $ decodeInput=f_encoded decodeFormat=TOML decodeOutput=toml.decoder.load(..)
# DUMPING
assert encoded == toml.dumps(decoded) # $ encodeInput=decoded encodeFormat=TOML encodeOutput=toml.dumps(..)
assert encoded == toml.dumps(o=decoded) # $ encodeInput=decoded encodeFormat=TOML encodeOutput=toml.dumps(..)
assert encoded == toml.encoder.dumps(decoded) # $ encodeInput=decoded encodeFormat=TOML encodeOutput=toml.encoder.dumps(..)
f_encoded = StringIO()
toml.dump(decoded, f_encoded) # $ encodeInput=decoded encodeFormat=TOML encodeOutput=f_encoded
assert encoded == f_encoded.getvalue()
f_encoded = StringIO()
toml.dump(o=decoded, f=f_encoded) # $ encodeInput=decoded encodeFormat=TOML encodeOutput=f_encoded
assert encoded == f_encoded.getvalue()
f_encoded = StringIO()
toml.encoder.dump(decoded, f_encoded) # $ encodeInput=decoded encodeFormat=TOML encodeOutput=f_encoded
assert encoded == f_encoded.getvalue()