Merge pull request #6092 from RasmusWL/markupsafe-modeling

Python: Add `MarkupSafe` model
This commit is contained in:
Taus
2021-06-30 15:52:10 +02:00
committed by GitHub
12 changed files with 367 additions and 0 deletions

View File

@@ -178,3 +178,4 @@ Python built-in support
pycryptodome, Cryptography library
pycryptodomex, Cryptography library
rsa, Cryptography library
MarkupSafe, Escaping Library

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the PyPI package `MarkupSafe`.

View File

@@ -293,6 +293,78 @@ module SqlExecution {
}
}
/**
* A data-flow node that escapes meta-characters, which could be used to prevent
* injection attacks.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Escaping::Range` instead.
*/
class Escaping extends DataFlow::Node {
Escaping::Range range;
Escaping() {
this = range and
// escapes that don't have _both_ input/output defined are not valid
exists(range.getAnInput()) and
exists(range.getOutput())
}
/** Gets an input that will be escaped. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
/** Gets the output that contains the escaped data. */
DataFlow::Node getOutput() { result = range.getOutput() }
/**
* Gets the context that this function escapes for, such as `html`, or `url`.
*/
string getKind() { result = range.getKind() }
}
/** Provides a class for modeling new escaping APIs. */
module Escaping {
/**
* A data-flow node that escapes meta-characters, which could be used to prevent
* injection attacks.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `Escaping` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets an input that will be escaped. */
abstract DataFlow::Node getAnInput();
/** Gets the output that contains the escaped data. */
abstract DataFlow::Node getOutput();
/**
* Gets the context that this function escapes for.
*
* While kinds are represented as strings, this should not be relied upon. Use the
* predicates in the `Escaping` module, such as `getHtmlKind`.
*/
abstract string getKind();
}
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
string getHtmlKind() { result = "html" }
// TODO: If adding an XML kind, update the modeling of the `MarkupSafe` PyPI package.
//
// Technically it claims to escape for both HTML and XML, but for now we don't have
// anything that relies on XML escaping, so I'm going to defer deciding whether they
// should be the same kind, or whether they deserve to be treated differently.
}
/**
* An escape of a string so it can be safely included in
* the body of an HTML element, for example, replacing `{}` in
* `<p>{}</p>`.
*/
class HtmlEscaping extends Escaping {
HtmlEscaping() { range.getKind() = Escaping::getHtmlKind() }
}
/** Provides classes for modeling HTTP-related APIs. */
module HTTP {
import semmle.python.web.HttpConstants

View File

@@ -16,6 +16,7 @@ private import semmle.python.frameworks.Flask
private import semmle.python.frameworks.Idna
private import semmle.python.frameworks.Invoke
private import semmle.python.frameworks.Jmespath
private import semmle.python.frameworks.MarkupSafe
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.Mysql
private import semmle.python.frameworks.MySQLdb

View File

@@ -0,0 +1,151 @@
/**
* Provides classes modeling security-relevant aspects of the `MarkupSafe` PyPI package.
* See https://markupsafe.palletsprojects.com/en/2.0.x/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `MarkupSafe` PyPI package.
* See https://markupsafe.palletsprojects.com/en/2.0.x/.
*/
private module MarkupSafeModel {
/**
* Provides models for the `markupsafe.Markup` class
*
* See https://markupsafe.palletsprojects.com/en/2.0.x/escaping/#markupsafe.Markup.
*/
module Markup {
/** Gets a reference to the `markupsafe.Markup` class. */
API::Node classRef() {
result = API::moduleImport("markupsafe").getMember("Markup")
or
result = API::moduleImport("flask").getMember("Markup")
}
/**
* A source of instances of `markupsafe.Markup`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Markup::instance()` to get references to instances of `markupsafe.Markup`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `markupsafe.Markup`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
override CallNode node;
ClassInstantiation() { this = classRef().getACall() }
}
/** Gets a reference to an instance of `markupsafe.Markup`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `markupsafe.Markup`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/** A string concatenation with a `markupsafe.Markup` involved. */
class StringConcat extends Markup::InstanceSource, DataFlow::CfgNode {
override BinaryExprNode node;
StringConcat() {
node.getOp() instanceof Add and
instance().asCfgNode() in [node.getLeft(), node.getRight()]
}
}
/** A string format with `markupsafe.Markup` as the format string. */
class StringFormat extends Markup::InstanceSource, DataFlow::MethodCallNode {
StringFormat() { this.calls(instance(), "format") }
}
/** A %-style string format with `markupsafe.Markup` as the format string. */
class PercentStringFormat extends Markup::InstanceSource, DataFlow::CfgNode {
override BinaryExprNode node;
PercentStringFormat() {
node.getOp() instanceof Mod and
instance().asCfgNode() = node.getLeft()
}
}
/** Taint propagation for `markupsafe.Markup`. */
class AddtionalTaintSteps extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.(ClassInstantiation).getArg(0) = nodeFrom
}
}
}
/** Any escaping performed via the `markupsafe` package. */
abstract private class MarkupSafeEscape extends Escaping::Range {
override string getKind() {
// TODO: this package claims to escape for both HTML and XML, but for now we don't
// model XML.
result = Escaping::getHtmlKind()
}
}
/** A call to any of the escaping functions in `markupsafe` */
private class MarkupSafeEscapeCall extends Markup::InstanceSource, MarkupSafeEscape,
DataFlow::CallCfgNode {
MarkupSafeEscapeCall() {
this = API::moduleImport("markupsafe").getMember(["escape", "escape_silent"]).getACall()
or
this = Markup::classRef().getMember("escape").getACall()
or
this = API::moduleImport("flask").getMember("escape").getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getOutput() { result = this }
}
/**
* An escape from string concatenation with a `markupsafe.Markup` involved.
*
* Only things that are not already a `markupsafe.Markup` instances will be escaped.
*/
private class MarkupEscapeFromStringConcat extends MarkupSafeEscape, Markup::StringConcat {
override DataFlow::Node getAnInput() {
result.asCfgNode() in [node.getLeft(), node.getRight()] and
not result = Markup::instance()
}
override DataFlow::Node getOutput() { result = this }
}
/** A escape from string format with `markupsafe.Markup` as the format string. */
private class MarkupEscapeFromStringFormat extends MarkupSafeEscape, Markup::StringFormat {
override DataFlow::Node getAnInput() {
result in [this.getArg(_), this.getArgByName(_)] and
not result = Markup::instance()
}
override DataFlow::Node getOutput() { result = this }
}
/** A escape from %-style string format with `markupsafe.Markup` as the format string. */
private class MarkupEscapeFromPercentStringFormat extends MarkupSafeEscape,
Markup::PercentStringFormat {
override DataFlow::Node getAnInput() {
result.asCfgNode() = node.getRight() and
not result = Markup::instance()
}
override DataFlow::Node getOutput() { result = this }
}
}

View File

@@ -29,4 +29,13 @@ class ReflectedXssConfiguration extends TaintTracking::Configuration {
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
}
// TODO: For now, since there is not an `isSanitizingStep` member-predicate part of a
// `TaintTracking::Configuration`, we use treat the output is a taint-sanitizer. This
// is slightly imprecise, which you can see in the `m_unsafe + SAFE` test-case in
// python/ql/test/library-tests/frameworks/markupsafe/taint_test.py
//
// However, it is better than `getAnInput()`. Due to use-use flow, that would remove
// the taint-flow to `SINK()` in `some_escape(tainted); SINK(tainted)`.
override predicate isSanitizer(DataFlow::Node node) { node = any(HtmlEscaping esc).getOutput() }
}

View File

@@ -129,6 +129,38 @@ class SqlExecutionTest extends InlineExpectationsTest {
}
}
class EscapingTest extends InlineExpectationsTest {
EscapingTest() { this = "EscapingTest" }
override string getARelevantTag() { result in ["escapeInput", "escapeOutput", "escapeKind"] }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(Escaping esc |
exists(DataFlow::Node data |
location = data.getLocation() and
element = data.toString() and
value = prettyNodeForInlineTest(data) and
(
data = esc.getAnInput() and
tag = "escapeInput"
or
data = esc.getOutput() and
tag = "escapeOutput"
)
)
or
exists(string format |
location = esc.getLocation() and
element = format and
value = format and
format = esc.getKind() and
tag = "escapeKind"
)
)
}
}
class HttpServerRouteSetupTest extends InlineExpectationsTest {
HttpServerRouteSetupTest() { this = "HttpServerRouteSetupTest" }

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,3 @@
argumentToEnsureNotTaintedNotMarkedAsSpurious
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
failures

View File

@@ -0,0 +1,13 @@
import experimental.meta.InlineTaintTest
import semmle.python.Concepts
class HtmlSpecialization extends TestTaintTrackingConfiguration {
// TODO: For now, since there is not an `isSanitizingStep` member-predicate part of a
// `TaintTracking::Configuration`, we use treat the output is a taint-sanitizer. This
// is slightly imprecise, which you can see in the `m_unsafe + SAFE` test-case in
// python/ql/test/library-tests/frameworks/markupsafe/taint_test.py
//
// However, it is better than `getAnInput()`. Due to use-use flow, that would remove
// the taint-flow to `SINK()` in `some_escape(tainted); SINK(tainted)`.
override predicate isSanitizer(DataFlow::Node node) { node = any(HtmlEscaping esc).getOutput() }
}

View File

@@ -0,0 +1,81 @@
from markupsafe import escape, escape_silent, Markup
def ensure_tainted(*args):
print("ensure_tainted")
for x in args: print(" ", x)
def ensure_not_tainted(*args):
print("ensure_not_tainted")
for x in args: print(" ", x)
# these contain `{}` so we can use .format, and `%s` so we can use %-style formatting
TAINTED_STRING = '<"TAINTED_STRING" {} %s>'
SAFE = "SAFE {} %s"
def test():
ts = TAINTED_STRING
# class `Markup` can be used for things that are already safe.
# if used with any text in a string operation, that other text will be escaped.
#
# see https://markupsafe.palletsprojects.com/en/2.0.x/
m_unsafe = Markup(TAINTED_STRING)
m_safe = Markup(SAFE)
# this 3 tests might look strange, but the purpose is to check we still treat `ts`
# as tainted even after it has been escaped in some place. This _might_ not be the
# case since data-flow library has taint-steps from adjacent uses...
ensure_tainted(ts) # $ tainted
ensure_not_tainted(escape(ts)) # $ escapeInput=ts escapeKind=html escapeOutput=escape(..)
ensure_tainted(ts) # $ tainted
ensure_tainted(
ts, # $ tainted
m_unsafe, # $ tainted
m_unsafe + SAFE, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
SAFE + m_unsafe, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
m_unsafe.format(SAFE), # $ escapeInput=SAFE escapeKind=html escapeOutput=m_unsafe.format(..) MISSING: tainted
m_unsafe % SAFE, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
m_unsafe + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
m_safe.format(m_unsafe), # $ tainted
m_safe % m_unsafe, # $ tainted
escape(ts).unescape(), # $ escapeInput=ts escapeKind=html escapeOutput=escape(..) MISSING: tainted
escape_silent(ts).unescape(), # $ escapeInput=ts escapeKind=html escapeOutput=escape_silent(..) MISSING: tainted
)
ensure_not_tainted(
escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=escape(..)
escape_silent(ts), # $ escapeInput=ts escapeKind=html escapeOutput=escape_silent(..)
Markup.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=Markup.escape(..)
m_safe,
m_safe + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
ts + m_safe, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
m_safe.format(ts), # $ escapeInput=ts escapeKind=html escapeOutput=m_safe.format(..)
m_safe % ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
escape(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=escape(..)
escape_silent(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=escape_silent(..)
Markup.escape(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=Markup.escape(..)
)
# flask re-exports these, as:
# flask.escape = markupsafe.escape
# flask.Markup = markupsafe.Markup
import flask
ensure_tainted(
flask.Markup(ts), # $ tainted
)
ensure_not_tainted(
flask.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=flask.escape(..)
flask.Markup.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=flask.Markup.escape(..)
)
test()