mirror of
https://github.com/github/codeql.git
synced 2026-05-01 19:55:15 +02:00
Python : Add Xpath injection query
This PR adds support for detecting XPATH injection in Python. I have included the ql files as well as the tests with this.
This commit is contained in:
18
python/ql/src/experimental/CWE-643/xpath.py
Normal file
18
python/ql/src/experimental/CWE-643/xpath.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from lxml import etree
|
||||
from io import StringIO
|
||||
|
||||
from django.urls import path
|
||||
from django.http import HttpResponse
|
||||
from django.template import Template, Context, Engine, engines
|
||||
|
||||
|
||||
def a(request):
|
||||
xpathQuery = request.GET['xpath']
|
||||
f = StringIO('<foo><bar></bar></foo>')
|
||||
tree = etree.parse(f)
|
||||
r = tree.xpath(xpathQuery)
|
||||
|
||||
|
||||
urlpatterns = [
|
||||
path('a', a)
|
||||
]
|
||||
32
python/ql/src/experimental/CWE-643/xpath.qhelp
Normal file
32
python/ql/src/experimental/CWE-643/xpath.qhelp
Normal file
@@ -0,0 +1,32 @@
|
||||
<!DOCTYPE qhelp SYSTEM "qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
Using user-supplied information to construct an XPath query for XML data can
|
||||
result in an XPath injection flaw. By sending intentionally malformed information,
|
||||
an attacker can access data that he may not normally have access to.
|
||||
He/She may even be able to elevate his privileges on the web site if the XML data
|
||||
is being used for authentication (such as an XML based user file).
|
||||
</overview>
|
||||
<recommendation>
|
||||
<p>
|
||||
XPath injection can be prevented using parameterized XPath interface or escaping the user input to make it safe to include in a dynamically constructed query.
|
||||
If you are using quotes to terminate untrusted input in a dynamically constructed XPath query, then you need to escape that quote in the untrusted input to ensure the untrusted data can’t try to break out of that quoted context.
|
||||
</p>
|
||||
<p>
|
||||
Another better mitigation option is to use a precompiled XPath query. Precompiled XPath queries are already preset before the program executes, rather than created on the fly after the user’s input has been added to the string. This is a better route because you don’t have to worry about missing a character that should have been escaped.
|
||||
</p>
|
||||
<example>
|
||||
|
||||
<p>In the example below, the xpath query is controlled by the user and hence leads to a vulnerability.</p>
|
||||
|
||||
<sample src="xpath.py" />
|
||||
</example>
|
||||
<references>
|
||||
<li>OWASP XPath injection : <a href="https://owasp.org/www-community/attacks/XPATH_Injection"></a>/>> </li>
|
||||
</references>
|
||||
|
||||
|
||||
</recommendation>
|
||||
|
||||
|
||||
</qhelp>
|
||||
35
python/ql/src/experimental/CWE-643/xpath.ql
Normal file
35
python/ql/src/experimental/CWE-643/xpath.ql
Normal file
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
* @name XPath query built from user-controlled sources
|
||||
* @description Building a XPath query from user-controlled sources is vulnerable to insertion of
|
||||
* malicious Xpath code by the user.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @precision high
|
||||
* @id py/xpath-injection
|
||||
* @tags security
|
||||
* external/cwe/cwe-643
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.Paths
|
||||
/* Sources */
|
||||
import semmle.python.web.HttpRequest
|
||||
/* Sinks */
|
||||
import experimental.semmle.python.security.injection.Xpath
|
||||
|
||||
class XpathInjectionConfiguration extends TaintTracking::Configuration {
|
||||
XpathInjectionConfiguration() { this = "Xpath injection configuration" }
|
||||
|
||||
override predicate isSource(TaintTracking::Source source) {
|
||||
source instanceof HttpRequestTaintSource
|
||||
}
|
||||
|
||||
override predicate isSink(TaintTracking::Sink sink) {
|
||||
sink instanceof XpathInjection::XpathInjectionSink
|
||||
}
|
||||
}
|
||||
|
||||
from XpathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
|
||||
where config.hasFlowPath(src, sink)
|
||||
select sink.getSink(), src, sink, "This Xpath query depends on $@.", src.getSource(),
|
||||
"a user-provided value"
|
||||
@@ -0,0 +1,96 @@
|
||||
/**
|
||||
* Provides class and predicates to track external data that
|
||||
* may represent malicious xpath query objects.
|
||||
*
|
||||
* This module is intended to be imported into a taint-tracking query
|
||||
* to extend `TaintKind` and `TaintSink`.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.TaintTracking
|
||||
import semmle.python.web.HttpRequest
|
||||
|
||||
/** Models Xpath Injection related classes and functions */
|
||||
module XpathInjection {
|
||||
/** Returns a class value which refers to `lxml.etree` */
|
||||
Value etree() { result = Value::named("lxml.etree") }
|
||||
|
||||
/** A generic taint sink that is vulnerable to Xpath injection. */
|
||||
abstract class XpathInjectionSink extends TaintSink { }
|
||||
|
||||
/**
|
||||
* A Sink representing an argument to the `etree.Xpath` call.
|
||||
*
|
||||
* from lxml import etree
|
||||
* root = etree.XML("<xmlContent>")
|
||||
* find_text = etree.XPath("`sink`")
|
||||
*/
|
||||
private class EtreeXpathArgument extends XpathInjectionSink {
|
||||
override string toString() { result = "lxml.etree.Xpath" }
|
||||
|
||||
EtreeXpathArgument() {
|
||||
exists(CallNode call, AttrNode atr |
|
||||
atr = etree().getAReference().getASuccessor() and
|
||||
atr.getName() = "XPath" and
|
||||
atr = call.getFunction()
|
||||
|
|
||||
call.getArg(0) = this
|
||||
)
|
||||
}
|
||||
|
||||
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
|
||||
}
|
||||
|
||||
/**
|
||||
* A Sink representing an argument to the `etree.EtXpath` call.
|
||||
*
|
||||
* from lxml import etree
|
||||
* root = etree.XML("<xmlContent>")
|
||||
* find_text = etree.EtXPath("`sink`")
|
||||
*/
|
||||
private class EtreeETXpathArgument extends XpathInjectionSink {
|
||||
override string toString() { result = "lxml.etree.ETXpath" }
|
||||
|
||||
EtreeETXpathArgument() {
|
||||
exists(CallNode call, AttrNode atr |
|
||||
atr = etree().getAReference().getASuccessor() and
|
||||
atr.getName() = "ETXPath" and
|
||||
atr = call.getFunction()
|
||||
|
|
||||
call.getArg(0) = this
|
||||
)
|
||||
}
|
||||
|
||||
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
|
||||
}
|
||||
|
||||
/**
|
||||
* A Sink representing an argument to the `xpath` call to a parsed xml document.
|
||||
*
|
||||
* from lxml import etree
|
||||
* from io import StringIO
|
||||
* f = StringIO('<foo><bar></bar></foo>')
|
||||
* tree = etree.parse(f)
|
||||
* r = tree.xpath('`sink`')
|
||||
*/
|
||||
private class ParseXpathArgument extends XpathInjectionSink {
|
||||
override string toString() { result = "lxml.etree.parse.xpath" }
|
||||
|
||||
ParseXpathArgument() {
|
||||
exists(CallNode parseCall, AttrNode parse, string s |
|
||||
parse = etree().getAReference().getASuccessor() and
|
||||
parse.getName() = "parse" and
|
||||
parse = parseCall.getFunction() and
|
||||
exists(CallNode xpathCall, AttrNode xpath |
|
||||
xpath = parseCall.getASuccessor*() and
|
||||
xpath.getName() = "xpath" and
|
||||
xpath = xpathCall.getFunction() and
|
||||
s = xpath.getName() and
|
||||
this = xpathCall.getArg(0)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
semmle-extractor-options: --max-import-depth=3 -p ../../../query-tests/Security/lib/
|
||||
33
python/ql/test/experimental/CWE-643/XpathLibTests/xpath.py
Normal file
33
python/ql/test/experimental/CWE-643/XpathLibTests/xpath.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from lxml import etree
|
||||
from io import StringIO
|
||||
|
||||
|
||||
def a():
|
||||
f = StringIO('<foo><bar></bar></foo>')
|
||||
tree = etree.parse(f)
|
||||
r = tree.xpath('/foo/bar')
|
||||
|
||||
|
||||
def b():
|
||||
root = etree.XML("<root><a>TEXT</a></root>")
|
||||
find_text = etree.XPath("//text()")
|
||||
text = find_text(root)[0]
|
||||
|
||||
|
||||
def c():
|
||||
root = etree.XML("<root><a>TEXT</a></root>")
|
||||
find_text = etree.XPath("//text()", smart_strings=False)
|
||||
text = find_text(root)[0]
|
||||
|
||||
|
||||
def d():
|
||||
root = etree.XML("<root><a>TEXT</a></root>")
|
||||
find_text = find = etree.ETXPath("//{ns}b")
|
||||
text = find_text(root)[0]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
a()
|
||||
b()
|
||||
c()
|
||||
d()
|
||||
@@ -0,0 +1,4 @@
|
||||
| xpath.py:8:20:8:29 | lxml.etree.parse.xpath | externally controlled string |
|
||||
| xpath.py:13:29:13:38 | lxml.etree.Xpath | externally controlled string |
|
||||
| xpath.py:19:29:19:38 | lxml.etree.Xpath | externally controlled string |
|
||||
| xpath.py:25:38:25:46 | lxml.etree.ETXpath | externally controlled string |
|
||||
@@ -0,0 +1,6 @@
|
||||
import python
|
||||
import experimental.semmle.python.security.injection.Xpath
|
||||
|
||||
from XpathInjection::XpathInjectionSink sink, TaintKind kind
|
||||
where sink.sinks(kind)
|
||||
select sink, kind
|
||||
1
python/ql/test/experimental/CWE-643/options
Normal file
1
python/ql/test/experimental/CWE-643/options
Normal file
@@ -0,0 +1 @@
|
||||
semmle-extractor-options: --max-import-depth=3 -p ../../query-tests/Security/lib/
|
||||
22
python/ql/test/experimental/CWE-643/xpath.expected
Normal file
22
python/ql/test/experimental/CWE-643/xpath.expected
Normal file
@@ -0,0 +1,22 @@
|
||||
edges
|
||||
| xpathFlow.py:10:18:10:29 | dict of externally controlled string | xpathFlow.py:10:18:10:44 | externally controlled string |
|
||||
| xpathFlow.py:10:18:10:29 | dict of externally controlled string | xpathFlow.py:10:18:10:44 | externally controlled string |
|
||||
| xpathFlow.py:10:18:10:44 | externally controlled string | xpathFlow.py:13:20:13:29 | externally controlled string |
|
||||
| xpathFlow.py:10:18:10:44 | externally controlled string | xpathFlow.py:13:20:13:29 | externally controlled string |
|
||||
| xpathFlow.py:18:18:18:29 | dict of externally controlled string | xpathFlow.py:18:18:18:44 | externally controlled string |
|
||||
| xpathFlow.py:18:18:18:29 | dict of externally controlled string | xpathFlow.py:18:18:18:44 | externally controlled string |
|
||||
| xpathFlow.py:18:18:18:44 | externally controlled string | xpathFlow.py:21:29:21:38 | externally controlled string |
|
||||
| xpathFlow.py:18:18:18:44 | externally controlled string | xpathFlow.py:21:29:21:38 | externally controlled string |
|
||||
| xpathFlow.py:27:18:27:29 | dict of externally controlled string | xpathFlow.py:27:18:27:44 | externally controlled string |
|
||||
| xpathFlow.py:27:18:27:29 | dict of externally controlled string | xpathFlow.py:27:18:27:44 | externally controlled string |
|
||||
| xpathFlow.py:27:18:27:44 | externally controlled string | xpathFlow.py:29:29:29:38 | externally controlled string |
|
||||
| xpathFlow.py:27:18:27:44 | externally controlled string | xpathFlow.py:29:29:29:38 | externally controlled string |
|
||||
| xpathFlow.py:35:18:35:29 | dict of externally controlled string | xpathFlow.py:35:18:35:44 | externally controlled string |
|
||||
| xpathFlow.py:35:18:35:29 | dict of externally controlled string | xpathFlow.py:35:18:35:44 | externally controlled string |
|
||||
| xpathFlow.py:35:18:35:44 | externally controlled string | xpathFlow.py:37:38:37:47 | externally controlled string |
|
||||
| xpathFlow.py:35:18:35:44 | externally controlled string | xpathFlow.py:37:38:37:47 | externally controlled string |
|
||||
#select
|
||||
| xpathFlow.py:13:20:13:29 | xpathQuery | xpathFlow.py:10:18:10:29 | dict of externally controlled string | xpathFlow.py:13:20:13:29 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:10:18:10:29 | Attribute | a user-provided value |
|
||||
| xpathFlow.py:21:29:21:38 | xpathQuery | xpathFlow.py:18:18:18:29 | dict of externally controlled string | xpathFlow.py:21:29:21:38 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:18:18:18:29 | Attribute | a user-provided value |
|
||||
| xpathFlow.py:29:29:29:38 | xpathQuery | xpathFlow.py:27:18:27:29 | dict of externally controlled string | xpathFlow.py:29:29:29:38 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:27:18:27:29 | Attribute | a user-provided value |
|
||||
| xpathFlow.py:37:38:37:47 | xpathQuery | xpathFlow.py:35:18:35:29 | dict of externally controlled string | xpathFlow.py:37:38:37:47 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:35:18:35:29 | Attribute | a user-provided value |
|
||||
1
python/ql/test/experimental/CWE-643/xpath.qlref
Normal file
1
python/ql/test/experimental/CWE-643/xpath.qlref
Normal file
@@ -0,0 +1 @@
|
||||
experimental/CWE-643/xpath.ql
|
||||
38
python/ql/test/experimental/CWE-643/xpathFlow.py
Normal file
38
python/ql/test/experimental/CWE-643/xpathFlow.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from lxml import etree
|
||||
from io import StringIO
|
||||
from flask import Flask, request
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route("/xpath1")
|
||||
def a():
|
||||
xpathQuery = request.args.get('xml', '')
|
||||
f = StringIO('<foo><bar></bar></foo>')
|
||||
tree = etree.parse(f)
|
||||
r = tree.xpath(xpathQuery)
|
||||
|
||||
|
||||
@app.route("/xpath2")
|
||||
def b():
|
||||
xpathQuery = request.args.get('xml', '')
|
||||
|
||||
root = etree.XML("<root><a>TEXT</a></root>")
|
||||
find_text = etree.XPath(xpathQuery)
|
||||
text = find_text(root)[0]
|
||||
|
||||
|
||||
@app.route("/xpath3")
|
||||
def c():
|
||||
xpathQuery = request.args.get('xml', '')
|
||||
root = etree.XML("<root><a>TEXT</a></root>")
|
||||
find_text = etree.XPath(xpathQuery, smart_strings=False)
|
||||
text = find_text(root)[0]
|
||||
|
||||
|
||||
@app.route("/xpath4")
|
||||
def d():
|
||||
xpathQuery = request.args.get('xml', '')
|
||||
root = etree.XML("<root><a>TEXT</a></root>")
|
||||
find_text = find = etree.ETXPath(xpathQuery)
|
||||
text = find_text(root)[0]
|
||||
@@ -0,0 +1,37 @@
|
||||
class _ElementTree(object):
|
||||
def xpath(self, _path, namespaces=None, extensions=None, smart_strings=True, **_variables):
|
||||
pass
|
||||
|
||||
def xslt(self, _xslt, extensions=None, access_control=None, **_kw):
|
||||
pass
|
||||
|
||||
|
||||
class ETXPath(object):
|
||||
def __init__(self, path, extensions=None, regexp=True, smart_strings=True):
|
||||
pass
|
||||
|
||||
|
||||
class XPath(object):
|
||||
def __init__(self, path, namespaces=None, extensions=None, regexp=True, smart_strings=True):
|
||||
pass
|
||||
|
||||
|
||||
class XSLT(object):
|
||||
def __init__(self, xslt_input, extensions=None, regexp=True, access_control=None):
|
||||
pass
|
||||
|
||||
|
||||
def parse(self, parser=None, base_url=None):
|
||||
return _ElementTree()
|
||||
|
||||
|
||||
def fromstring(self, text, parser=None, base_url=None):
|
||||
pass
|
||||
|
||||
|
||||
def fromstringlist(self, strings, parser=None):
|
||||
pass
|
||||
|
||||
|
||||
def XML(self, text, parser=None, base_url=None):
|
||||
pass
|
||||
Reference in New Issue
Block a user