Merge pull request #13670 from jorgectf/seclab/xxe-sanitizer

Python: Add `markupsafe` as XXE sanitizer
This commit is contained in:
Rasmus Wriedt Larsen
2023-07-07 12:30:26 +02:00
committed by GitHub
6 changed files with 66 additions and 44 deletions

View File

@@ -680,6 +680,9 @@ module Escaping {
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
string getHtmlKind() { result = "html" }
/** Gets the escape-kind for escaping a string so it can safely be included in XML. */
string getXmlKind() { result = "xml" }
/** Gets the escape-kind for escaping a string so it can safely be included in a regular expression. */
string getRegexKind() { result = "regex" }
@@ -710,6 +713,15 @@ class HtmlEscaping extends Escaping {
HtmlEscaping() { super.getKind() = Escaping::getHtmlKind() }
}
/**
* An escape of a string so it can be safely included in
* the body of an XML element, for example, replacing `&` and `<>` in
* `<foo>&xxe;<foo>`.
*/
class XmlEscaping extends Escaping {
XmlEscaping() { super.getKind() = Escaping::getXmlKind() }
}
/**
* An escape of a string so it can be safely included in
* the body of a regex.

View File

@@ -83,7 +83,7 @@ private module MarkupSafeModel {
}
/** Taint propagation for `markupsafe.Markup`. */
private class AddtionalTaintStep extends TaintTracking::AdditionalTaintStep {
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.(ClassInstantiation).getArg(0) = nodeFrom
}
@@ -92,11 +92,7 @@ private module MarkupSafeModel {
/** Any escaping performed via the `markupsafe` package. */
abstract private class MarkupSafeEscape extends Escaping::Range {
override string getKind() {
// TODO: this package claims to escape for both HTML and XML, but for now we don't
// model XML.
result = Escaping::getHtmlKind()
}
override string getKind() { result in [Escaping::getHtmlKind(), Escaping::getXmlKind()] }
}
/** A call to any of the escaping functions in `markupsafe` */

View File

@@ -44,4 +44,11 @@ module Xxe {
)
}
}
/**
* An XML escaping, considered as a sanitizer.
*/
class XmlEscapingAsSanitizer extends Sanitizer {
XmlEscapingAsSanitizer() { this = any(XmlEscaping esc).getOutput() }
}
}

View File

@@ -27,40 +27,40 @@ def test():
# as tainted even after it has been escaped in some place. This _might_ not be the
# case since data-flow library has taint-steps from adjacent uses...
ensure_tainted(ts) # $ tainted
ensure_not_tainted(escape(ts)) # $ escapeInput=ts escapeKind=html escapeOutput=escape(..)
ensure_not_tainted(escape(ts)) # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=escape(..)
ensure_tainted(ts) # $ tainted
ensure_tainted(
ts, # $ tainted
m_unsafe, # $ tainted
m_unsafe + SAFE, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
SAFE + m_unsafe, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
m_unsafe.format(SAFE), # $ escapeInput=SAFE escapeKind=html escapeOutput=m_unsafe.format(..) MISSING: tainted
m_unsafe % SAFE, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
m_unsafe + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
m_unsafe + SAFE, # $ escapeInput=SAFE escapeKind=html escapeKind=xml escapeOutput=BinaryExpr MISSING: tainted
SAFE + m_unsafe, # $ escapeInput=SAFE escapeKind=html escapeKind=xml escapeOutput=BinaryExpr MISSING: tainted
m_unsafe.format(SAFE), # $ escapeInput=SAFE escapeKind=html escapeKind=xml escapeOutput=m_unsafe.format(..) MISSING: tainted
m_unsafe % SAFE, # $ escapeInput=SAFE escapeKind=html escapeKind=xml escapeOutput=BinaryExpr MISSING: tainted
m_unsafe + ts, # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=BinaryExpr MISSING: tainted
m_safe.format(m_unsafe), # $ tainted
m_safe % m_unsafe, # $ tainted
escape(ts).unescape(), # $ escapeInput=ts escapeKind=html escapeOutput=escape(..) MISSING: tainted
escape_silent(ts).unescape(), # $ escapeInput=ts escapeKind=html escapeOutput=escape_silent(..) MISSING: tainted
escape(ts).unescape(), # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=escape(..) MISSING: tainted
escape_silent(ts).unescape(), # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=escape_silent(..) MISSING: tainted
)
ensure_not_tainted(
escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=escape(..)
escape_silent(ts), # $ escapeInput=ts escapeKind=html escapeOutput=escape_silent(..)
escape(ts), # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=escape(..)
escape_silent(ts), # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=escape_silent(..)
Markup.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=Markup.escape(..)
Markup.escape(ts), # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=Markup.escape(..)
m_safe,
m_safe + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
ts + m_safe, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
m_safe.format(ts), # $ escapeInput=ts escapeKind=html escapeOutput=m_safe.format(..)
m_safe % ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
m_safe + ts, # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=BinaryExpr
ts + m_safe, # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=BinaryExpr
m_safe.format(ts), # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=m_safe.format(..)
m_safe % ts, # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=BinaryExpr
escape(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=escape(..)
escape_silent(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=escape_silent(..)
Markup.escape(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=Markup.escape(..)
escape(ts) + ts, # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=BinaryExpr escapeOutput=escape(..)
escape_silent(ts) + ts, # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=BinaryExpr escapeOutput=escape_silent(..)
Markup.escape(ts) + ts, # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=BinaryExpr escapeOutput=Markup.escape(..)
)
# flask re-exports these, as:
@@ -73,8 +73,8 @@ def test():
)
ensure_not_tainted(
flask.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=flask.escape(..)
flask.Markup.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=flask.Markup.escape(..)
flask.escape(ts), # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=flask.escape(..)
flask.Markup.escape(ts), # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=flask.Markup.escape(..)
)

View File

@@ -1,25 +1,25 @@
edges
| test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:1:26:1:32 | GSSA Variable request |
| test.py:1:26:1:32 | GSSA Variable request | test.py:8:19:8:25 | ControlFlowNode for request |
| test.py:1:26:1:32 | GSSA Variable request | test.py:19:19:19:25 | ControlFlowNode for request |
| test.py:8:19:8:25 | ControlFlowNode for request | test.py:8:19:8:30 | ControlFlowNode for Attribute |
| test.py:8:19:8:30 | ControlFlowNode for Attribute | test.py:8:19:8:45 | ControlFlowNode for Subscript |
| test.py:8:19:8:45 | ControlFlowNode for Subscript | test.py:9:34:9:44 | ControlFlowNode for xml_content |
| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:19:19:30 | ControlFlowNode for Attribute |
| test.py:19:19:19:30 | ControlFlowNode for Attribute | test.py:19:19:19:45 | ControlFlowNode for Subscript |
| test.py:19:19:19:45 | ControlFlowNode for Subscript | test.py:30:34:30:44 | ControlFlowNode for xml_content |
| test.py:1:26:1:32 | GSSA Variable request | test.py:9:19:9:25 | ControlFlowNode for request |
| test.py:1:26:1:32 | GSSA Variable request | test.py:20:19:20:25 | ControlFlowNode for request |
| test.py:9:19:9:25 | ControlFlowNode for request | test.py:9:19:9:30 | ControlFlowNode for Attribute |
| test.py:9:19:9:30 | ControlFlowNode for Attribute | test.py:9:19:9:45 | ControlFlowNode for Subscript |
| test.py:9:19:9:45 | ControlFlowNode for Subscript | test.py:10:34:10:44 | ControlFlowNode for xml_content |
| test.py:20:19:20:25 | ControlFlowNode for request | test.py:20:19:20:30 | ControlFlowNode for Attribute |
| test.py:20:19:20:30 | ControlFlowNode for Attribute | test.py:20:19:20:45 | ControlFlowNode for Subscript |
| test.py:20:19:20:45 | ControlFlowNode for Subscript | test.py:31:34:31:44 | ControlFlowNode for xml_content |
nodes
| test.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| test.py:1:26:1:32 | GSSA Variable request | semmle.label | GSSA Variable request |
| test.py:8:19:8:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:8:19:8:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:8:19:8:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| test.py:9:34:9:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
| test.py:9:19:9:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:9:19:9:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:9:19:9:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| test.py:10:34:10:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
| test.py:20:19:20:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:20:19:20:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:20:19:20:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| test.py:31:34:31:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
subpaths
#select
| test.py:9:34:9:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:9:34:9:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against external entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:30:34:30:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against external entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:10:34:10:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:10:34:10:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against external entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| test.py:31:34:31:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:31:34:31:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against external entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |

View File

@@ -1,5 +1,6 @@
from flask import Flask, request
import lxml.etree
import markupsafe
app = Flask(__name__)
@@ -28,3 +29,9 @@ def super_vuln_handler():
huge_tree=True,
)
return lxml.etree.fromstring(xml_content, parser=parser).text
@app.route("/sanitized-handler")
def sanitized_handler():
xml_content = request.args['xml_content']
xml_content = markupsafe.escape(xml_content)
return lxml.etree.fromstring(xml_content).text