Python: Add taint for StringIO and BytesIO

This commit is contained in:
Rasmus Wriedt Larsen
2022-03-29 17:18:06 +02:00
committed by Rasmus Wriedt Larsen
parent 57b9780428
commit 769f5691d0
3 changed files with 109 additions and 0 deletions

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added taint propagation for `io.StringIO` and `io.BytesIO`. This addition was originally [submitted as part of an experimental query by @jorgectf](https://github.com/github/codeql/pull/6112).

View File

@@ -3116,6 +3116,64 @@ private module StdlibPrivate {
result in [this.getArg(0), this.getArgByName("path")]
}
}
// ---------------------------------------------------------------------------
// io
// ---------------------------------------------------------------------------
/**
* Provides models for the `io.StringIO`/`io.BytesIO` classes
*
* See https://docs.python.org/3.10/library/io.html#io.StringIO.
*/
module StringIO {
/** Gets a reference to the `io.StringIO` class. */
private API::Node classRef() {
result = API::moduleImport("io").getMember(["StringIO", "BytesIO"])
}
/**
* A source of instances of `io.StringIO`/`io.BytesIO`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `StringIO::instance()` to get references to instances of `io.StringIO`.
*/
abstract class InstanceSource extends Stdlib::FileLikeObject::InstanceSource { }
/** A direct instantiation of `io.StringIO`/`io.BytesIO`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }
DataFlow::Node getInitialValue() {
result = this.getArg(0)
or
// `initial_value` for StringIO, `initial_bytes` for BytesIO
result = this.getArgByName(["initial_value", "initial_bytes"])
}
}
/** Gets a reference to an instance of `io.StringIO`/`io.BytesIO`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `io.StringIO`/`io.BytesIO`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Extra taint propagation for `io.StringIO`/`io.BytesIO`.
*/
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.(ClassInstantiation).getInitialValue() = nodeFrom
}
}
}
}
// ---------------------------------------------------------------------------

View File

@@ -0,0 +1,47 @@
from io import StringIO, BytesIO
TAINTED_STRING = "TS"
TAINTED_BYTES = b"TB"
def ensure_tainted(*args):
print("ensure_tainted")
for arg in args:
print("", repr(arg))
def test_stringio():
ts = TAINTED_STRING
x = StringIO()
x.write(ts)
x.seek(0)
ensure_tainted(
StringIO(ts), # $ tainted
StringIO(initial_value=ts), # $ tainted
x, # $ tainted
x.read(), # $ tainted
StringIO(ts).read(), # $ tainted
)
def test_bytesio():
tb = TAINTED_BYTES
x = BytesIO()
x.write(tb)
x.seek(0)
ensure_tainted(
BytesIO(tb), # $ tainted
BytesIO(initial_bytes=tb), # $ tainted
x, # $ tainted
x.read(), # $ tainted
BytesIO(tb).read(), # $ tainted
)
test_stringio()
test_bytesio()