Python : Add support for detecting XSLT Injection

This PR adds support for detecting XSLT injection in Python.
I have included the ql files as well as the tests with this.
This commit is contained in:
Porcupiney Hairs
2020-05-20 02:03:38 +05:30
parent d4e1ee8aa7
commit 1ceb963d4c
14 changed files with 437 additions and 0 deletions

View File

@@ -0,0 +1,16 @@
<!DOCTYPE qhelp SYSTEM "qhelp.dtd">
<qhelp>
<overview>
Processing an unvalidated XSL stylesheet can allow an attacker to change the structure and contents of the resultant XML, include arbitrary files from the file system, or execute arbitrary code.
</overview>
<recommendation>
<p>
This vulnerability can be prevented by not allowing untrusted user input to be passed as a XSL stylesheet.
If the application logic necessiates processing untrusted XSL stylesheets, the input should be properly filtered and sanitized before use.
</p>
<example>
<p>In the example below, the XSL stylesheet is controlled by the user and hence leads to a vulnerability.</p>
<sample src="xslt.py" />
</example>
</recommendation>
</qhelp>

View File

@@ -0,0 +1,35 @@
/**
* @name Xslt query built from user-controlled sources
* @description Building a XSLT query from user-controlled sources is vulnerable to insertion of
* malicious XSLT code by the user.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/xslt-injection
* @tags security
* external/cwe/cwe-643
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import experimental.semmle.python.security.injection.XSLT
class XSLTInjectionConfiguration extends TaintTracking::Configuration {
XSLTInjectionConfiguration() { this = "XSLT injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) {
sink instanceof XSLTInjection::XSLTInjectionSink
}
}
from XSLTInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This XSLT query depends on $@.", src.getSource(),
"a user-provided value"

View File

@@ -0,0 +1,14 @@
from lxml import etree
from io import StringIO
from flask import Flask, request
app = Flask(__name__)
@app.route("/xslt")
def bad():
xsltQuery = request.args.get('xml', '')
xslt_root = etree.XML(xsltQuery)
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result_tree = tree.xslt(xslt_root) # Not OK

View File

@@ -0,0 +1,119 @@
/**
* Provides class and predicates to track external data that
* may represent malicious XSLT query objects.
*
* This module is intended to be imported into a taint-tracking query
* to extend `TaintKind` and `TaintSink`.
*/
import python
import semmle.python.security.TaintTracking
import semmle.python.web.HttpRequest
/** Models XSLT Injection related classes and functions */
module XSLTInjection {
/** Returns a class value which refers to `lxml.etree` */
Value etree() { result = Value::named("lxml.etree") }
/** A generic taint sink that is vulnerable to XSLT injection. */
abstract class XSLTInjectionSink extends TaintSink { }
/**
* A kind of "taint", representing an untrusted XML string
*/
private class ExternalXmlStringKind extends ExternalStringKind {
ExternalXmlStringKind() { this = "etree.XML string" }
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
etreeXML(fromnode, tonode) and result instanceof ExternalXmlKind
or
etreeFromStringList(fromnode, tonode) and result instanceof ExternalXmlKind
or
etreeFromString(fromnode, tonode) and result instanceof ExternalXmlKind
}
}
/**
* A kind of "taint", representing a XML encoded string
*/
class ExternalXmlKind extends TaintKind {
ExternalXmlKind() { this = "lxml etree xml" }
}
private predicate etreeXML(ControlFlowNode fromnode, CallNode tonode) {
exists(CallNode call, AttrNode atr |
atr = etree().getAReference().getASuccessor() and
// XML(text, parser=None, base_url=None)
atr.getName() = "XML" and
atr = call.getFunction()
|
call.getArg(0) = fromnode and
call = tonode
)
}
private predicate etreeFromString(ControlFlowNode fromnode, CallNode tonode) {
// fromstring(text, parser=None)
exists(CallNode call | call.getFunction().(AttrNode).getObject("fromstring").pointsTo(etree()) |
call.getArg(0) = fromnode and
call = tonode
)
}
private predicate etreeFromStringList(ControlFlowNode fromnode, CallNode tonode) {
// fromstringlist(strings, parser=None)
exists(CallNode call |
call.getFunction().(AttrNode).getObject("fromstringlist").pointsTo(etree())
|
call.getArg(0) = fromnode and
call = tonode
)
}
/**
* A Sink representing an argument to the `etree.XSLT` call.
*
* from lxml import etree
* root = etree.XML("<xmlContent>")
* find_text = etree.XSLT("`sink`")
*/
private class EtreeXSLTArgument extends XSLTInjectionSink {
override string toString() { result = "lxml.etree.XSLT" }
EtreeXSLTArgument() {
exists(CallNode call | call.getFunction().(AttrNode).getObject("XSLT").pointsTo(etree()) |
call.getArg(0) = this
)
}
override predicate sinks(TaintKind kind) { kind instanceof ExternalXmlKind }
}
/**
* A Sink representing an argument to the `XSLT` call to a parsed xml document.
*
* from lxml import etree
* from io import StringIO
* `sink` = etree.XML(xsltQuery)
* tree = etree.parse(f)
* result_tree = tree.xslt(`sink`)
*/
private class ParseXSLTArgument extends XSLTInjectionSink {
override string toString() { result = "lxml.etree.parse.xslt" }
ParseXSLTArgument() {
exists(
CallNode parseCall, CallNode xsltCall, ControlFlowNode obj, Variable var, AssignStmt assign
|
parseCall.getFunction().(AttrNode).getObject("parse").pointsTo(etree()) and
assign.getValue().(Call).getAFlowNode() = parseCall and
xsltCall.getFunction().(AttrNode).getObject("xslt") = obj and
var.getAUse() = obj and
assign.getATarget() = var.getAStore() and
xsltCall.getArg(0) = this
)
}
override predicate sinks(TaintKind kind) { kind instanceof ExternalXmlKind }
}
}

View File

@@ -0,0 +1,47 @@
edges
| xslt.py:10:17:10:28 | dict of etree.XML string | xslt.py:10:17:10:43 | etree.XML string |
| xslt.py:10:17:10:28 | dict of etree.XML string | xslt.py:10:17:10:43 | etree.XML string |
| xslt.py:10:17:10:43 | etree.XML string | xslt.py:11:27:11:35 | etree.XML string |
| xslt.py:10:17:10:43 | etree.XML string | xslt.py:11:27:11:35 | etree.XML string |
| xslt.py:11:17:11:36 | lxml etree xml | xslt.py:14:29:14:37 | lxml etree xml |
| xslt.py:11:17:11:36 | lxml etree xml | xslt.py:14:29:14:37 | lxml etree xml |
| xslt.py:11:27:11:35 | etree.XML string | xslt.py:11:17:11:36 | lxml etree xml |
| xslt.py:11:27:11:35 | etree.XML string | xslt.py:11:17:11:36 | lxml etree xml |
| xsltInjection.py:10:17:10:28 | dict of etree.XML string | xsltInjection.py:10:17:10:43 | etree.XML string |
| xsltInjection.py:10:17:10:28 | dict of etree.XML string | xsltInjection.py:10:17:10:43 | etree.XML string |
| xsltInjection.py:10:17:10:43 | etree.XML string | xsltInjection.py:11:27:11:35 | etree.XML string |
| xsltInjection.py:10:17:10:43 | etree.XML string | xsltInjection.py:11:27:11:35 | etree.XML string |
| xsltInjection.py:11:17:11:36 | lxml etree xml | xsltInjection.py:12:28:12:36 | lxml etree xml |
| xsltInjection.py:11:17:11:36 | lxml etree xml | xsltInjection.py:12:28:12:36 | lxml etree xml |
| xsltInjection.py:11:27:11:35 | etree.XML string | xsltInjection.py:11:17:11:36 | lxml etree xml |
| xsltInjection.py:11:27:11:35 | etree.XML string | xsltInjection.py:11:17:11:36 | lxml etree xml |
| xsltInjection.py:17:17:17:28 | dict of etree.XML string | xsltInjection.py:17:17:17:43 | etree.XML string |
| xsltInjection.py:17:17:17:28 | dict of etree.XML string | xsltInjection.py:17:17:17:43 | etree.XML string |
| xsltInjection.py:17:17:17:43 | etree.XML string | xsltInjection.py:18:27:18:35 | etree.XML string |
| xsltInjection.py:17:17:17:43 | etree.XML string | xsltInjection.py:18:27:18:35 | etree.XML string |
| xsltInjection.py:18:17:18:36 | lxml etree xml | xsltInjection.py:21:29:21:37 | lxml etree xml |
| xsltInjection.py:18:17:18:36 | lxml etree xml | xsltInjection.py:21:29:21:37 | lxml etree xml |
| xsltInjection.py:18:27:18:35 | etree.XML string | xsltInjection.py:18:17:18:36 | lxml etree xml |
| xsltInjection.py:18:27:18:35 | etree.XML string | xsltInjection.py:18:17:18:36 | lxml etree xml |
| xsltInjection.py:26:17:26:28 | dict of etree.XML string | xsltInjection.py:26:17:26:43 | etree.XML string |
| xsltInjection.py:26:17:26:28 | dict of etree.XML string | xsltInjection.py:26:17:26:43 | etree.XML string |
| xsltInjection.py:26:17:26:43 | etree.XML string | xsltInjection.py:27:27:27:35 | etree.XML string |
| xsltInjection.py:26:17:26:43 | etree.XML string | xsltInjection.py:27:27:27:35 | etree.XML string |
| xsltInjection.py:27:17:27:36 | lxml etree xml | xsltInjection.py:31:24:31:32 | lxml etree xml |
| xsltInjection.py:27:17:27:36 | lxml etree xml | xsltInjection.py:31:24:31:32 | lxml etree xml |
| xsltInjection.py:27:27:27:35 | etree.XML string | xsltInjection.py:27:17:27:36 | lxml etree xml |
| xsltInjection.py:27:27:27:35 | etree.XML string | xsltInjection.py:27:17:27:36 | lxml etree xml |
| xsltInjection.py:35:17:35:28 | dict of etree.XML string | xsltInjection.py:35:17:35:43 | etree.XML string |
| xsltInjection.py:35:17:35:28 | dict of etree.XML string | xsltInjection.py:35:17:35:43 | etree.XML string |
| xsltInjection.py:35:17:35:43 | etree.XML string | xsltInjection.py:36:34:36:42 | etree.XML string |
| xsltInjection.py:35:17:35:43 | etree.XML string | xsltInjection.py:36:34:36:42 | etree.XML string |
| xsltInjection.py:36:17:36:43 | lxml etree xml | xsltInjection.py:40:24:40:32 | lxml etree xml |
| xsltInjection.py:36:17:36:43 | lxml etree xml | xsltInjection.py:40:24:40:32 | lxml etree xml |
| xsltInjection.py:36:34:36:42 | etree.XML string | xsltInjection.py:36:17:36:43 | lxml etree xml |
| xsltInjection.py:36:34:36:42 | etree.XML string | xsltInjection.py:36:17:36:43 | lxml etree xml |
#select
| xslt.py:14:29:14:37 | xslt_root | xslt.py:10:17:10:28 | dict of etree.XML string | xslt.py:14:29:14:37 | lxml etree xml | This XSLT query depends on $@. | xslt.py:10:17:10:28 | Attribute | a user-provided value |
| xsltInjection.py:12:28:12:36 | xslt_root | xsltInjection.py:10:17:10:28 | dict of etree.XML string | xsltInjection.py:12:28:12:36 | lxml etree xml | This XSLT query depends on $@. | xsltInjection.py:10:17:10:28 | Attribute | a user-provided value |
| xsltInjection.py:21:29:21:37 | xslt_root | xsltInjection.py:17:17:17:28 | dict of etree.XML string | xsltInjection.py:21:29:21:37 | lxml etree xml | This XSLT query depends on $@. | xsltInjection.py:17:17:17:28 | Attribute | a user-provided value |
| xsltInjection.py:31:24:31:32 | xslt_root | xsltInjection.py:26:17:26:28 | dict of etree.XML string | xsltInjection.py:31:24:31:32 | lxml etree xml | This XSLT query depends on $@. | xsltInjection.py:26:17:26:28 | Attribute | a user-provided value |
| xsltInjection.py:40:24:40:32 | xslt_root | xsltInjection.py:35:17:35:28 | dict of etree.XML string | xsltInjection.py:40:24:40:32 | lxml etree xml | This XSLT query depends on $@. | xsltInjection.py:35:17:35:28 | Attribute | a user-provided value |

View File

@@ -0,0 +1 @@
experimental/CWE-643/Xslt.ql

View File

@@ -0,0 +1,12 @@
| xslt.py:14:29:14:37 | lxml.etree.parse.xslt | lxml etree xml |
| xsltInjection.py:12:28:12:36 | lxml.etree.XSLT | lxml etree xml |
| xsltInjection.py:21:29:21:37 | lxml.etree.parse.xslt | lxml etree xml |
| xsltInjection.py:31:24:31:32 | lxml.etree.parse.xslt | lxml etree xml |
| xsltInjection.py:40:24:40:32 | lxml.etree.parse.xslt | lxml etree xml |
| xsltInjection.py:50:24:50:32 | lxml.etree.parse.xslt | lxml etree xml |
| xsltInjection.py:60:24:60:32 | lxml.etree.parse.xslt | lxml etree xml |
| xsltInjection.py:69:24:69:32 | lxml.etree.parse.xslt | lxml etree xml |
| xsltInjection.py:79:24:79:32 | lxml.etree.parse.xslt | lxml etree xml |
| xsltSinks.py:17:28:17:36 | lxml.etree.XSLT | lxml etree xml |
| xsltSinks.py:30:29:30:37 | lxml.etree.parse.xslt | lxml etree xml |
| xsltSinks.py:44:24:44:32 | lxml.etree.parse.xslt | lxml etree xml |

View File

@@ -0,0 +1,6 @@
import python
import experimental.semmle.python.security.injection.XSLT
from XSLTInjection::XSLTInjectionSink sink, TaintKind kind
where sink.sinks(kind)
select sink, kind

View File

@@ -0,0 +1 @@
semmle-extractor-options: -p ../../query-tests/Security/lib/ --max-import-depth=3

View File

@@ -0,0 +1,14 @@
from lxml import etree
from io import StringIO
from flask import Flask, request
app = Flask(__name__)
@app.route("/xslt")
def bad():
xsltQuery = request.args.get('xml', '')
xslt_root = etree.XML(xsltQuery)
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result_tree = tree.xslt(xslt_root) # Not OK

View File

@@ -0,0 +1,79 @@
from lxml import etree
from io import StringIO
from flask import Flask, request
app = Flask(__name__)
@app.route("/xslt1")
def a():
xsltQuery = request.args.get('xml', '')
xslt_root = etree.XML(xsltQuery)
transform = etree.XSLT(xslt_root) # Not OK
@app.route("/xslt2")
def b():
xsltQuery = request.args.get('xml', '')
xslt_root = etree.XML(xsltQuery)
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result_tree = tree.xslt(xslt_root) # Not OK
@app.route("/xslt3")
def c():
xsltQuery = request.args.get('xml', '')
xslt_root = etree.XML(xsltQuery)
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result = tree.xslt(xslt_root, a="'A'") # Not OK
@app.route("/xslt4")
def d():
xsltQuery = request.args.get('xml', '')
xslt_root = etree.fromstring(xsltQuery)
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result = tree.xslt(xslt_root, a="'A'") # Not OK
@app.route("/xslt5")
def e():
xsltQuery = request.args.get('xml', '')
xsltStrings = [xsltQuery,"asd","random"]
xslt_root = etree.fromstringlist(xsltStrings)
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result = tree.xslt(xslt_root, a="'A'") # Not OK
@app.route("/xslt6")
def f():
xsltQuery = '<non><remote><query></query></remote></non>'
xslt_root = etree.XML(xsltQuery)
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result = tree.xslt(xslt_root, a="'A'") # OK
@app.route("/xslt7")
def g():
xsltQuery = '<non><remote><query></query></remote></non>'
xslt_root = etree.fromstring(xsltQuery)
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result = tree.xslt(xslt_root, a="'A'") # OK
@app.route("/xslt8")
def h():
xsltQuery = '<non><remote><query></query></remote></non>'
xsltStrings = [xsltQuery,"asd","random"]
xslt_root = etree.fromstringlist(xsltStrings)
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result = tree.xslt(xslt_root, a="'A'") # OK

View File

@@ -0,0 +1,56 @@
from lxml import etree
from io import StringIO
from django.urls import path
from django.http import HttpResponse
from django.template import Template, Context, Engine, engines
def a(request):
xslt_root = etree.XML('''\
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/">
<foo><xsl:value-of select="/a/b/text()" /></foo>
</xsl:template>
</xsl:stylesheet>''')
transform = etree.XSLT(xslt_root)
def b(request):
xslt_root = etree.XML('''\
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/">
<foo><xsl:value-of select="/a/b/text()" /></foo>
</xsl:template>
</xsl:stylesheet>''')
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result_tree = tree.xslt(xslt_root)
def c(request):
xslt_root = etree.XML('''\
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/">
<foo><xsl:value-of select="/a/b/text()" /></foo>
</xsl:template>
</xsl:stylesheet>''')
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result = tree.xslt(xslt_root, a="'A'")
urlpatterns = [
path('a', a),
path('b', b),
path('c', c)
]
if __name__ == "__main__":
a(None)
b(None)
c(None)

View File

@@ -0,0 +1,37 @@
class _ElementTree(object):
def xpath(self, _path, namespaces=None, extensions=None, smart_strings=True, **_variables):
pass
def xslt(self, _xslt, extensions=None, access_control=None, **_kw):
pass
class ETXPath(object):
def __init__(self, path, extensions=None, regexp=True, smart_strings=True):
pass
class Xpath(object):
def __init__(self, path, namespaces=None, extensions=None, regexp=True, smart_strings=True):
pass
class XSLT(object):
def __init__(self, xslt_input, extensions=None, regexp=True, access_control=None):
pass
def parse(self, parser=None, base_url=None):
return _ElementTree()
def fromstring(self, text, parser=None, base_url=None):
pass
def fromstringlist(self, strings, parser=None):
pass
def XML(self, text, parser=None, base_url=None):
pass