Python: Model MarkupSafe PyPI package

Since expectation tests had so many changes from ConceptsTest, I'm going
to do the changes for that on in a separate commit. The important part
is the changes to taint-tracking, which is highlighted in this commit.
This commit is contained in:
Rasmus Wriedt Larsen
2021-06-16 18:46:21 +02:00
parent e1c4b8ca42
commit 14de3bffb7
8 changed files with 171 additions and 11 deletions

View File

@@ -24,3 +24,4 @@ private import semmle.python.frameworks.Tornado
private import semmle.python.frameworks.Ujson
private import semmle.python.frameworks.Yaml
private import semmle.python.frameworks.Yarl
private import semmle.python.frameworks.MarkupSafe

View File

@@ -0,0 +1,139 @@
/**
* Provides classes modeling security-relevant aspects of the `MarkupSafe` PyPI package.
* See https://markupsafe.palletsprojects.com/en/2.0.x/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `MarkupSafe` PyPI package.
* See https://markupsafe.palletsprojects.com/en/2.0.x/.
*/
private module MarkupSafeModel {
/**
* Provides models for the `markupsafe.Markup` class
*
* See https://markupsafe.palletsprojects.com/en/2.0.x/escaping/#markupsafe.Markup.
*/
module Markup {
/** Gets a reference to the `markupsafe.Markup` class. */
API::Node classRef() {
result = API::moduleImport("markupsafe").getMember("Markup")
or
result = API::moduleImport("flask").getMember("Markup")
}
/**
* A source of instances of `markupsafe.Markup`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Markup::instance()` to get references to instances of `markupsafe.Markup`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `markupsafe.Markup`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
override CallNode node;
ClassInstantiation() { this = classRef().getACall() }
}
/** Gets a reference to an instance of `markupsafe.Markup`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `markupsafe.Markup`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/** A string concatenation with a `markupsafe.Markup` involved. */
class StringConcat extends Markup::InstanceSource, DataFlow::CfgNode {
override BinaryExprNode node;
StringConcat() {
node.getOp() instanceof Add and
(
instance().asCfgNode() = node.getLeft()
or
instance().asCfgNode() = node.getRight()
)
}
}
/** A string format with `markupsafe.Markup` as the format string. */
class StringFormat extends Markup::InstanceSource, DataFlow::CallCfgNode {
StringFormat() {
exists(DataFlow::AttrRead attr | this.getFunction() = attr |
attr.getAttributeName() = "format" and
attr.getObject() = instance()
)
}
}
/** Taint propagation for `markupsafe.Markup`. */
class AddtionalTaintSteps extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.(ClassInstantiation).getArg(0) = nodeFrom
}
}
}
/** Any escaping performed via the `markupsafe` package. */
abstract private class MarkupSafeEscape extends Escaping::Range {
override string getKind() {
// TODO: this package claims to escape for both HTML and XML, but for now we don't
// model XML.
result = Escaping::getHtmlKind()
}
}
/** A call to any of the escaping functions in `markupsafe` */
private class MarkupSafeEscapeCall extends Markup::InstanceSource, MarkupSafeEscape,
DataFlow::CallCfgNode {
MarkupSafeEscapeCall() {
this = API::moduleImport("markupsafe").getMember(["escape", "escape_silent"]).getACall()
or
this = Markup::classRef().getMember("escape").getACall()
or
this = API::moduleImport("flask").getMember("escape").getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getOutput() { result = this }
}
/**
* An escape from string concatenation with a `markupsafe.Markup` involved.
*
* Only things that are not already a `markupsafe.Markup` instances will be escaped.
*/
private class MarkupEscapeFromStringConcat extends MarkupSafeEscape, Markup::StringConcat {
override DataFlow::Node getAnInput() {
result.asCfgNode() in [node.getLeft(), node.getRight()] and
not result = Markup::instance()
}
override DataFlow::Node getOutput() { result = this }
}
/** A escape from string format with `markupsafe.Markup` as the format string. */
private class MarkupEscapeFromStringFormat extends MarkupSafeEscape, Markup::StringFormat {
override DataFlow::Node getAnInput() {
result in [this.getArg(_), this.getArgByName(_)] and
not result = Markup::instance()
}
override DataFlow::Node getOutput() { result = this }
}
}

View File

@@ -29,4 +29,13 @@ class ReflectedXssConfiguration extends TaintTracking::Configuration {
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
}
// TODO: For now, since there is not an `isSanitizingStep` member-predicate part of a
// `TaintTracking::Configuration`, we use treat the output is a taint-sanitizer. This
// is slightly imprecise, which you can see in the `m_unsafe + SAFE` test-case in
// python/ql/test/library-tests/frameworks/markupsafe/taint_test.py
//
// However, it is better than `getAnInput()`. Due to use-use flow, that would remove
// the taint-flow to `SINK()` in `some_escape(tainted); SINK(tainted)`.
override predicate isSanitizer(DataFlow::Node node) { node = any(HtmlEscaping esc).getOutput() }
}

View File

@@ -1,2 +0,0 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -1 +1,13 @@
import experimental.meta.InlineTaintTest
import semmle.python.Concepts
class HtmlSpecialization extends TestTaintTrackingConfiguration {
// TODO: For now, since there is not an `isSanitizingStep` member-predicate part of a
// `TaintTracking::Configuration`, we use treat the output is a taint-sanitizer. This
// is slightly imprecise, which you can see in the `m_unsafe + SAFE` test-case in
// python/ql/test/library-tests/frameworks/markupsafe/taint_test.py
//
// However, it is better than `getAnInput()`. Due to use-use flow, that would remove
// the taint-flow to `SINK()` in `some_escape(tainted); SINK(tainted)`.
override predicate isSanitizer(DataFlow::Node node) { node = any(HtmlEscaping esc).getOutput() }
}

View File

@@ -32,11 +32,11 @@ def test():
ensure_tainted(
ts, # $ tainted
m_unsafe, # $ MISSING: tainted
m_unsafe, # $ tainted
m_unsafe + SAFE, # $ MISSING: tainted
SAFE + m_unsafe, # $ MISSING: tainted
m_unsafe.format(SAFE), # $ MISSING: tainted
m_unsafe + ts, # $ tainted
m_unsafe + ts, # $ MISSING: tainted
m_safe.format(m_unsafe), # $ MISSING: tainted
@@ -51,13 +51,13 @@ def test():
Markup.escape(ts),
m_safe,
m_safe + ts, # $ SPURIOUS: tainted
ts + m_safe, # $ SPURIOUS: tainted
m_safe.format(ts), # $ SPURIOUS: tainted
m_safe + ts,
ts + m_safe,
m_safe.format(ts),
escape(ts) + ts, # $ SPURIOUS: tainted
escape_silent(ts) + ts, # $ SPURIOUS: tainted
Markup.escape(ts) + ts, # $ SPURIOUS: tainted
escape(ts) + ts,
escape_silent(ts) + ts,
Markup.escape(ts) + ts,
)
# flask re-exports these, as:
@@ -66,7 +66,7 @@ def test():
import flask
ensure_tainted(
flask.Markup(ts), # $ MISSING: tainted
flask.Markup(ts), # $ tainted
)
ensure_not_tainted(