mirror of
https://github.com/github/codeql.git
synced 2025-12-22 03:36:30 +01:00
Merge pull request #13670 from jorgectf/seclab/xxe-sanitizer
Python: Add `markupsafe` as XXE sanitizer
This commit is contained in:
@@ -680,6 +680,9 @@ module Escaping {
|
||||
/** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
|
||||
string getHtmlKind() { result = "html" }
|
||||
|
||||
/** Gets the escape-kind for escaping a string so it can safely be included in XML. */
|
||||
string getXmlKind() { result = "xml" }
|
||||
|
||||
/** Gets the escape-kind for escaping a string so it can safely be included in a regular expression. */
|
||||
string getRegexKind() { result = "regex" }
|
||||
|
||||
@@ -710,6 +713,15 @@ class HtmlEscaping extends Escaping {
|
||||
HtmlEscaping() { super.getKind() = Escaping::getHtmlKind() }
|
||||
}
|
||||
|
||||
/**
|
||||
* An escape of a string so it can be safely included in
|
||||
* the body of an XML element, for example, replacing `&` and `<>` in
|
||||
* `<foo>&xxe;<foo>`.
|
||||
*/
|
||||
class XmlEscaping extends Escaping {
|
||||
XmlEscaping() { super.getKind() = Escaping::getXmlKind() }
|
||||
}
|
||||
|
||||
/**
|
||||
* An escape of a string so it can be safely included in
|
||||
* the body of a regex.
|
||||
|
||||
@@ -83,7 +83,7 @@ private module MarkupSafeModel {
|
||||
}
|
||||
|
||||
/** Taint propagation for `markupsafe.Markup`. */
|
||||
private class AddtionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
nodeTo.(ClassInstantiation).getArg(0) = nodeFrom
|
||||
}
|
||||
@@ -92,11 +92,7 @@ private module MarkupSafeModel {
|
||||
|
||||
/** Any escaping performed via the `markupsafe` package. */
|
||||
abstract private class MarkupSafeEscape extends Escaping::Range {
|
||||
override string getKind() {
|
||||
// TODO: this package claims to escape for both HTML and XML, but for now we don't
|
||||
// model XML.
|
||||
result = Escaping::getHtmlKind()
|
||||
}
|
||||
override string getKind() { result in [Escaping::getHtmlKind(), Escaping::getXmlKind()] }
|
||||
}
|
||||
|
||||
/** A call to any of the escaping functions in `markupsafe` */
|
||||
|
||||
@@ -44,4 +44,11 @@ module Xxe {
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An XML escaping, considered as a sanitizer.
|
||||
*/
|
||||
class XmlEscapingAsSanitizer extends Sanitizer {
|
||||
XmlEscapingAsSanitizer() { this = any(XmlEscaping esc).getOutput() }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,40 +27,40 @@ def test():
|
||||
# as tainted even after it has been escaped in some place. This _might_ not be the
|
||||
# case since data-flow library has taint-steps from adjacent uses...
|
||||
ensure_tainted(ts) # $ tainted
|
||||
ensure_not_tainted(escape(ts)) # $ escapeInput=ts escapeKind=html escapeOutput=escape(..)
|
||||
ensure_not_tainted(escape(ts)) # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=escape(..)
|
||||
ensure_tainted(ts) # $ tainted
|
||||
|
||||
ensure_tainted(
|
||||
ts, # $ tainted
|
||||
m_unsafe, # $ tainted
|
||||
m_unsafe + SAFE, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
|
||||
SAFE + m_unsafe, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
|
||||
m_unsafe.format(SAFE), # $ escapeInput=SAFE escapeKind=html escapeOutput=m_unsafe.format(..) MISSING: tainted
|
||||
m_unsafe % SAFE, # $ escapeInput=SAFE escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
|
||||
m_unsafe + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr MISSING: tainted
|
||||
m_unsafe + SAFE, # $ escapeInput=SAFE escapeKind=html escapeKind=xml escapeOutput=BinaryExpr MISSING: tainted
|
||||
SAFE + m_unsafe, # $ escapeInput=SAFE escapeKind=html escapeKind=xml escapeOutput=BinaryExpr MISSING: tainted
|
||||
m_unsafe.format(SAFE), # $ escapeInput=SAFE escapeKind=html escapeKind=xml escapeOutput=m_unsafe.format(..) MISSING: tainted
|
||||
m_unsafe % SAFE, # $ escapeInput=SAFE escapeKind=html escapeKind=xml escapeOutput=BinaryExpr MISSING: tainted
|
||||
m_unsafe + ts, # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=BinaryExpr MISSING: tainted
|
||||
|
||||
m_safe.format(m_unsafe), # $ tainted
|
||||
m_safe % m_unsafe, # $ tainted
|
||||
|
||||
escape(ts).unescape(), # $ escapeInput=ts escapeKind=html escapeOutput=escape(..) MISSING: tainted
|
||||
escape_silent(ts).unescape(), # $ escapeInput=ts escapeKind=html escapeOutput=escape_silent(..) MISSING: tainted
|
||||
escape(ts).unescape(), # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=escape(..) MISSING: tainted
|
||||
escape_silent(ts).unescape(), # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=escape_silent(..) MISSING: tainted
|
||||
)
|
||||
|
||||
ensure_not_tainted(
|
||||
escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=escape(..)
|
||||
escape_silent(ts), # $ escapeInput=ts escapeKind=html escapeOutput=escape_silent(..)
|
||||
escape(ts), # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=escape(..)
|
||||
escape_silent(ts), # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=escape_silent(..)
|
||||
|
||||
Markup.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=Markup.escape(..)
|
||||
Markup.escape(ts), # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=Markup.escape(..)
|
||||
|
||||
m_safe,
|
||||
m_safe + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
|
||||
ts + m_safe, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
|
||||
m_safe.format(ts), # $ escapeInput=ts escapeKind=html escapeOutput=m_safe.format(..)
|
||||
m_safe % ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr
|
||||
m_safe + ts, # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=BinaryExpr
|
||||
ts + m_safe, # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=BinaryExpr
|
||||
m_safe.format(ts), # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=m_safe.format(..)
|
||||
m_safe % ts, # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=BinaryExpr
|
||||
|
||||
escape(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=escape(..)
|
||||
escape_silent(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=escape_silent(..)
|
||||
Markup.escape(ts) + ts, # $ escapeInput=ts escapeKind=html escapeOutput=BinaryExpr escapeOutput=Markup.escape(..)
|
||||
escape(ts) + ts, # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=BinaryExpr escapeOutput=escape(..)
|
||||
escape_silent(ts) + ts, # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=BinaryExpr escapeOutput=escape_silent(..)
|
||||
Markup.escape(ts) + ts, # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=BinaryExpr escapeOutput=Markup.escape(..)
|
||||
)
|
||||
|
||||
# flask re-exports these, as:
|
||||
@@ -73,8 +73,8 @@ def test():
|
||||
)
|
||||
|
||||
ensure_not_tainted(
|
||||
flask.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=flask.escape(..)
|
||||
flask.Markup.escape(ts), # $ escapeInput=ts escapeKind=html escapeOutput=flask.Markup.escape(..)
|
||||
flask.escape(ts), # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=flask.escape(..)
|
||||
flask.Markup.escape(ts), # $ escapeInput=ts escapeKind=html escapeKind=xml escapeOutput=flask.Markup.escape(..)
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,25 +1,25 @@
|
||||
edges
|
||||
| test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:1:26:1:32 | GSSA Variable request |
|
||||
| test.py:1:26:1:32 | GSSA Variable request | test.py:8:19:8:25 | ControlFlowNode for request |
|
||||
| test.py:1:26:1:32 | GSSA Variable request | test.py:19:19:19:25 | ControlFlowNode for request |
|
||||
| test.py:8:19:8:25 | ControlFlowNode for request | test.py:8:19:8:30 | ControlFlowNode for Attribute |
|
||||
| test.py:8:19:8:30 | ControlFlowNode for Attribute | test.py:8:19:8:45 | ControlFlowNode for Subscript |
|
||||
| test.py:8:19:8:45 | ControlFlowNode for Subscript | test.py:9:34:9:44 | ControlFlowNode for xml_content |
|
||||
| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:19:19:30 | ControlFlowNode for Attribute |
|
||||
| test.py:19:19:19:30 | ControlFlowNode for Attribute | test.py:19:19:19:45 | ControlFlowNode for Subscript |
|
||||
| test.py:19:19:19:45 | ControlFlowNode for Subscript | test.py:30:34:30:44 | ControlFlowNode for xml_content |
|
||||
| test.py:1:26:1:32 | GSSA Variable request | test.py:9:19:9:25 | ControlFlowNode for request |
|
||||
| test.py:1:26:1:32 | GSSA Variable request | test.py:20:19:20:25 | ControlFlowNode for request |
|
||||
| test.py:9:19:9:25 | ControlFlowNode for request | test.py:9:19:9:30 | ControlFlowNode for Attribute |
|
||||
| test.py:9:19:9:30 | ControlFlowNode for Attribute | test.py:9:19:9:45 | ControlFlowNode for Subscript |
|
||||
| test.py:9:19:9:45 | ControlFlowNode for Subscript | test.py:10:34:10:44 | ControlFlowNode for xml_content |
|
||||
| test.py:20:19:20:25 | ControlFlowNode for request | test.py:20:19:20:30 | ControlFlowNode for Attribute |
|
||||
| test.py:20:19:20:30 | ControlFlowNode for Attribute | test.py:20:19:20:45 | ControlFlowNode for Subscript |
|
||||
| test.py:20:19:20:45 | ControlFlowNode for Subscript | test.py:31:34:31:44 | ControlFlowNode for xml_content |
|
||||
nodes
|
||||
| test.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
|
||||
| test.py:1:26:1:32 | GSSA Variable request | semmle.label | GSSA Variable request |
|
||||
| test.py:8:19:8:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
|
||||
| test.py:8:19:8:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
|
||||
| test.py:8:19:8:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
|
||||
| test.py:9:34:9:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
|
||||
| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
|
||||
| test.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
|
||||
| test.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
|
||||
| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
|
||||
| test.py:9:19:9:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
|
||||
| test.py:9:19:9:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
|
||||
| test.py:9:19:9:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
|
||||
| test.py:10:34:10:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
|
||||
| test.py:20:19:20:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
|
||||
| test.py:20:19:20:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
|
||||
| test.py:20:19:20:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
|
||||
| test.py:31:34:31:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
|
||||
subpaths
|
||||
#select
|
||||
| test.py:9:34:9:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:9:34:9:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against external entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
|
||||
| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:30:34:30:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against external entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
|
||||
| test.py:10:34:10:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:10:34:10:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against external entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
|
||||
| test.py:31:34:31:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:31:34:31:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against external entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from flask import Flask, request
|
||||
import lxml.etree
|
||||
import markupsafe
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@@ -28,3 +29,9 @@ def super_vuln_handler():
|
||||
huge_tree=True,
|
||||
)
|
||||
return lxml.etree.fromstring(xml_content, parser=parser).text
|
||||
|
||||
@app.route("/sanitized-handler")
|
||||
def sanitized_handler():
|
||||
xml_content = request.args['xml_content']
|
||||
xml_content = markupsafe.escape(xml_content)
|
||||
return lxml.etree.fromstring(xml_content).text
|
||||
Reference in New Issue
Block a user