mirror of
https://github.com/github/codeql.git
synced 2026-04-26 17:25:19 +02:00
Merge pull request #6092 from RasmusWL/markupsafe-modeling
Python: Add `MarkupSafe` model
This commit is contained in:
@@ -178,3 +178,4 @@ Python built-in support
|
||||
pycryptodome, Cryptography library
|
||||
pycryptodomex, Cryptography library
|
||||
rsa, Cryptography library
|
||||
MarkupSafe, Escaping Library
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Added modeling of the PyPI package `MarkupSafe`.
|
||||
@@ -293,6 +293,78 @@ module SqlExecution {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that escapes meta-characters, which could be used to prevent
|
||||
* injection attacks.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `Escaping::Range` instead.
|
||||
*/
|
||||
class Escaping extends DataFlow::Node {
|
||||
Escaping::Range range;
|
||||
|
||||
Escaping() {
|
||||
this = range and
|
||||
// escapes that don't have _both_ input/output defined are not valid
|
||||
exists(range.getAnInput()) and
|
||||
exists(range.getOutput())
|
||||
}
|
||||
|
||||
/** Gets an input that will be escaped. */
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
|
||||
/** Gets the output that contains the escaped data. */
|
||||
DataFlow::Node getOutput() { result = range.getOutput() }
|
||||
|
||||
/**
|
||||
* Gets the context that this function escapes for, such as `html`, or `url`.
|
||||
*/
|
||||
string getKind() { result = range.getKind() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new escaping APIs. */
|
||||
module Escaping {
|
||||
/**
|
||||
* A data-flow node that escapes meta-characters, which could be used to prevent
|
||||
* injection attacks.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `Escaping` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets an input that will be escaped. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
|
||||
/** Gets the output that contains the escaped data. */
|
||||
abstract DataFlow::Node getOutput();
|
||||
|
||||
/**
|
||||
* Gets the context that this function escapes for.
|
||||
*
|
||||
* While kinds are represented as strings, this should not be relied upon. Use the
|
||||
* predicates in the `Escaping` module, such as `getHtmlKind`.
|
||||
*/
|
||||
abstract string getKind();
|
||||
}
|
||||
|
||||
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
|
||||
string getHtmlKind() { result = "html" }
|
||||
// TODO: If adding an XML kind, update the modeling of the `MarkupSafe` PyPI package.
|
||||
//
|
||||
// Technically it claims to escape for both HTML and XML, but for now we don't have
|
||||
// anything that relies on XML escaping, so I'm going to defer deciding whether they
|
||||
// should be the same kind, or whether they deserve to be treated differently.
|
||||
}
|
||||
|
||||
/**
|
||||
* An escape of a string so it can be safely included in
|
||||
* the body of an HTML element, for example, replacing `{}` in
|
||||
* `<p>{}</p>`.
|
||||
*/
|
||||
class HtmlEscaping extends Escaping {
|
||||
HtmlEscaping() { range.getKind() = Escaping::getHtmlKind() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling HTTP-related APIs. */
|
||||
module HTTP {
|
||||
import semmle.python.web.HttpConstants
|
||||
|
||||
@@ -16,6 +16,7 @@ private import semmle.python.frameworks.Flask
|
||||
private import semmle.python.frameworks.Idna
|
||||
private import semmle.python.frameworks.Invoke
|
||||
private import semmle.python.frameworks.Jmespath
|
||||
private import semmle.python.frameworks.MarkupSafe
|
||||
private import semmle.python.frameworks.Multidict
|
||||
private import semmle.python.frameworks.Mysql
|
||||
private import semmle.python.frameworks.MySQLdb
|
||||
|
||||
151
python/ql/src/semmle/python/frameworks/MarkupSafe.qll
Normal file
151
python/ql/src/semmle/python/frameworks/MarkupSafe.qll
Normal file
@@ -0,0 +1,151 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `MarkupSafe` PyPI package.
|
||||
* See https://markupsafe.palletsprojects.com/en/2.0.x/.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for the `MarkupSafe` PyPI package.
|
||||
* See https://markupsafe.palletsprojects.com/en/2.0.x/.
|
||||
*/
|
||||
private module MarkupSafeModel {
|
||||
/**
|
||||
* Provides models for the `markupsafe.Markup` class
|
||||
*
|
||||
* See https://markupsafe.palletsprojects.com/en/2.0.x/escaping/#markupsafe.Markup.
|
||||
*/
|
||||
module Markup {
|
||||
/** Gets a reference to the `markupsafe.Markup` class. */
|
||||
API::Node classRef() {
|
||||
result = API::moduleImport("markupsafe").getMember("Markup")
|
||||
or
|
||||
result = API::moduleImport("flask").getMember("Markup")
|
||||
}
|
||||
|
||||
/**
|
||||
* A source of instances of `markupsafe.Markup`, extend this class to model new instances.
|
||||
*
|
||||
* This can include instantiations of the class, return values from function
|
||||
* calls, or a special parameter that will be set when functions are called by an external
|
||||
* library.
|
||||
*
|
||||
* Use the predicate `Markup::instance()` to get references to instances of `markupsafe.Markup`.
|
||||
*/
|
||||
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
|
||||
|
||||
/** A direct instantiation of `markupsafe.Markup`. */
|
||||
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
|
||||
override CallNode node;
|
||||
|
||||
ClassInstantiation() { this = classRef().getACall() }
|
||||
}
|
||||
|
||||
/** Gets a reference to an instance of `markupsafe.Markup`. */
|
||||
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result instanceof InstanceSource
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to an instance of `markupsafe.Markup`. */
|
||||
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
|
||||
|
||||
/** A string concatenation with a `markupsafe.Markup` involved. */
|
||||
class StringConcat extends Markup::InstanceSource, DataFlow::CfgNode {
|
||||
override BinaryExprNode node;
|
||||
|
||||
StringConcat() {
|
||||
node.getOp() instanceof Add and
|
||||
instance().asCfgNode() in [node.getLeft(), node.getRight()]
|
||||
}
|
||||
}
|
||||
|
||||
/** A string format with `markupsafe.Markup` as the format string. */
|
||||
class StringFormat extends Markup::InstanceSource, DataFlow::MethodCallNode {
|
||||
StringFormat() { this.calls(instance(), "format") }
|
||||
}
|
||||
|
||||
/** A %-style string format with `markupsafe.Markup` as the format string. */
|
||||
class PercentStringFormat extends Markup::InstanceSource, DataFlow::CfgNode {
|
||||
override BinaryExprNode node;
|
||||
|
||||
PercentStringFormat() {
|
||||
node.getOp() instanceof Mod and
|
||||
instance().asCfgNode() = node.getLeft()
|
||||
}
|
||||
}
|
||||
|
||||
/** Taint propagation for `markupsafe.Markup`. */
|
||||
class AddtionalTaintSteps extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
nodeTo.(ClassInstantiation).getArg(0) = nodeFrom
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Any escaping performed via the `markupsafe` package. */
|
||||
abstract private class MarkupSafeEscape extends Escaping::Range {
|
||||
override string getKind() {
|
||||
// TODO: this package claims to escape for both HTML and XML, but for now we don't
|
||||
// model XML.
|
||||
result = Escaping::getHtmlKind()
|
||||
}
|
||||
}
|
||||
|
||||
/** A call to any of the escaping functions in `markupsafe` */
|
||||
private class MarkupSafeEscapeCall extends Markup::InstanceSource, MarkupSafeEscape,
|
||||
DataFlow::CallCfgNode {
|
||||
MarkupSafeEscapeCall() {
|
||||
this = API::moduleImport("markupsafe").getMember(["escape", "escape_silent"]).getACall()
|
||||
or
|
||||
this = Markup::classRef().getMember("escape").getACall()
|
||||
or
|
||||
this = API::moduleImport("flask").getMember("escape").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
}
|
||||
|
||||
/**
|
||||
* An escape from string concatenation with a `markupsafe.Markup` involved.
|
||||
*
|
||||
* Only things that are not already a `markupsafe.Markup` instances will be escaped.
|
||||
*/
|
||||
private class MarkupEscapeFromStringConcat extends MarkupSafeEscape, Markup::StringConcat {
|
||||
override DataFlow::Node getAnInput() {
|
||||
result.asCfgNode() in [node.getLeft(), node.getRight()] and
|
||||
not result = Markup::instance()
|
||||
}
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
}
|
||||
|
||||
/** A escape from string format with `markupsafe.Markup` as the format string. */
|
||||
private class MarkupEscapeFromStringFormat extends MarkupSafeEscape, Markup::StringFormat {
|
||||
override DataFlow::Node getAnInput() {
|
||||
result in [this.getArg(_), this.getArgByName(_)] and
|
||||
not result = Markup::instance()
|
||||
}
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
}
|
||||
|
||||
/** A escape from %-style string format with `markupsafe.Markup` as the format string. */
|
||||
private class MarkupEscapeFromPercentStringFormat extends MarkupSafeEscape,
|
||||
Markup::PercentStringFormat {
|
||||
override DataFlow::Node getAnInput() {
|
||||
result.asCfgNode() = node.getRight() and
|
||||
not result = Markup::instance()
|
||||
}
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
}
|
||||
}
|
||||
@@ -29,4 +29,13 @@ class ReflectedXssConfiguration extends TaintTracking::Configuration {
|
||||
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
|
||||
guard instanceof StringConstCompare
|
||||
}
|
||||
|
||||
// TODO: For now, since there is not an `isSanitizingStep` member-predicate part of a
|
||||
// `TaintTracking::Configuration`, we use treat the output is a taint-sanitizer. This
|
||||
// is slightly imprecise, which you can see in the `m_unsafe + SAFE` test-case in
|
||||
// python/ql/test/library-tests/frameworks/markupsafe/taint_test.py
|
||||
//
|
||||
// However, it is better than `getAnInput()`. Due to use-use flow, that would remove
|
||||
// the taint-flow to `SINK()` in `some_escape(tainted); SINK(tainted)`.
|
||||
override predicate isSanitizer(DataFlow::Node node) { node = any(HtmlEscaping esc).getOutput() }
|
||||
}
|
||||
|
||||
@@ -129,6 +129,38 @@ class SqlExecutionTest extends InlineExpectationsTest {
|
||||
}
|
||||
}
|
||||
|
||||
class EscapingTest extends InlineExpectationsTest {
|
||||
EscapingTest() { this = "EscapingTest" }
|
||||
|
||||
override string getARelevantTag() { result in ["escapeInput", "escapeOutput", "escapeKind"] }
|
||||
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(location.getFile().getRelativePath()) and
|
||||
exists(Escaping esc |
|
||||
exists(DataFlow::Node data |
|
||||
location = data.getLocation() and
|
||||
element = data.toString() and
|
||||
value = prettyNodeForInlineTest(data) and
|
||||
(
|
||||
data = esc.getAnInput() and
|
||||
tag = "escapeInput"
|
||||
or
|
||||
data = esc.getOutput() and
|
||||
tag = "escapeOutput"
|
||||
)
|
||||
)
|
||||
or
|
||||
exists(string format |
|
||||
location = esc.getLocation() and
|
||||
element = format and
|
||||
value = format and
|
||||
format = esc.getKind() and
|
||||
tag = "escapeKind"
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class HttpServerRouteSetupTest extends InlineExpectationsTest {
|
||||
HttpServerRouteSetupTest() { this = "HttpServerRouteSetupTest" }
|
||||
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
import python
|
||||
import experimental.meta.ConceptsTest
|
||||
@@ -0,0 +1,3 @@
|
||||
argumentToEnsureNotTaintedNotMarkedAsSpurious
|
||||
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
|
||||
failures
|
||||
@@ -0,0 +1,13 @@
|
||||
import experimental.meta.InlineTaintTest
|
||||
import semmle.python.Concepts
|
||||
|
||||
class HtmlSpecialization extends TestTaintTrackingConfiguration {
|
||||
// TODO: For now, since there is not an `isSanitizingStep` member-predicate part of a
|
||||
// `TaintTracking::Configuration`, we use treat the output is a taint-sanitizer. This
|
||||
// is slightly imprecise, which you can see in the `m_unsafe + SAFE` test-case in
|
||||
// python/ql/test/library-tests/frameworks/markupsafe/taint_test.py
|
||||
//
|
||||
// However, it is better than `getAnInput()`. Due to use-use flow, that would remove
|
||||
// the taint-flow to `SINK()` in `some_escape(tainted); SINK(tainted)`.
|
||||
override predicate isSanitizer(DataFlow::Node node) { node = any(HtmlEscaping esc).getOutput() }
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
from markupsafe import escape, escape_silent, Markup
|
||||
|
||||
def ensure_tainted(*args):
|
||||
print("ensure_tainted")
|
||||
for x in args: print(" ", x)
|
||||
|
||||
def ensure_not_tainted(*args):
|
||||
print("ensure_not_tainted")
|
||||
for x in args: print(" ", x)
|
||||
|
||||
# these contain `{}` so we can use .format, and `%s` so we can use %-style formatting
|
||||
TAINTED_STRING = '<"TAINTED_STRING" {} %s>'
|
||||
SAFE = "SAFE {} %s"
|
||||
|
||||
def test():
|
||||
ts = TAINTED_STRING
|
||||
|
||||
# class `Markup` can be used for things that are already safe.
|
||||
# if used with any text in a string operation, that other text will be escaped.
|
||||
#
|
||||
# see https://markupsafe.palletsprojects.com/en/2.0.x/
|
||||
m_unsafe = Markup(TAINTED_STRING)
|
||||
m_safe = Markup(SAFE)
|
||||
|
||||
|
||||
# this 3 tests might look strange, but the purpose is to check we still treat `ts`
|
||||
# as tainted even after it has been escaped in some place. This _might_ not be the
|
||||
# case since data-flow library has taint-steps from adjacent uses...
|
||||
ensure_tainted(ts) # $ tainted
|
||||
ensure_not_tainted(escape(ts)) # $ escapeInput=ts escapeKind=html escapeOutput=escape(..)
|
||||
ensure_tainted(ts) # $ tainted
|
||||
|
||||
ensure_tainted(
|
||||
ts, # $ tainted
|
||||
m_unsafe, # $ tainted
|
||||
m_unsafe + SAFE, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
|
||||
SAFE + m_unsafe, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
|
||||
m_unsafe.format(SAFE), # $ escapeInput=SAFE escapeKind=html escapeOutput=m_unsafe.format(..) MISSING: tainted
|
||||
m_unsafe % SAFE, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
|
||||
m_unsafe + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
|
||||
|
||||
m_safe.format(m_unsafe), # $ tainted
|
||||
m_safe % m_unsafe, # $ tainted
|
||||
|
||||
escape(ts).unescape(), # $ escapeInput=ts escapeKind=html escapeOutput=escape(..) MISSING: tainted
|
||||
escape_silent(ts).unescape(), # $ escapeInput=ts escapeKind=html escapeOutput=escape_silent(..) MISSING: tainted
|
||||
)
|
||||
|
||||
ensure_not_tainted(
|
||||
escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=escape(..)
|
||||
escape_silent(ts), # $ escapeInput=ts escapeKind=html escapeOutput=escape_silent(..)
|
||||
|
||||
Markup.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=Markup.escape(..)
|
||||
|
||||
m_safe,
|
||||
m_safe + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
|
||||
ts + m_safe, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
|
||||
m_safe.format(ts), # $ escapeInput=ts escapeKind=html escapeOutput=m_safe.format(..)
|
||||
m_safe % ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
|
||||
|
||||
escape(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=escape(..)
|
||||
escape_silent(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=escape_silent(..)
|
||||
Markup.escape(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=Markup.escape(..)
|
||||
)
|
||||
|
||||
# flask re-exports these, as:
|
||||
# flask.escape = markupsafe.escape
|
||||
# flask.Markup = markupsafe.Markup
|
||||
import flask
|
||||
|
||||
ensure_tainted(
|
||||
flask.Markup(ts), # $ tainted
|
||||
)
|
||||
|
||||
ensure_not_tainted(
|
||||
flask.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=flask.escape(..)
|
||||
flask.Markup.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=flask.Markup.escape(..)
|
||||
)
|
||||
|
||||
|
||||
test()
|
||||
Reference in New Issue
Block a user