Merge pull request #8634 from RasmusWL/promote-xxe

Python: Promote XXE and XML-bomb queries
This commit is contained in:
yoff
2022-05-09 21:54:55 +02:00
committed by GitHub
64 changed files with 1701 additions and 1091 deletions

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added taint propagation for `io.StringIO` and `io.BytesIO`. This addition was originally [submitted as part of an experimental query by @jorgectf](https://github.com/github/codeql/pull/6112).

View File

@@ -498,6 +498,65 @@ module XML {
abstract string getName();
}
}
/**
* A kind of XML vulnerability.
*
* See overview of kinds at https://pypi.org/project/defusedxml/#python-xml-libraries
*
* See PoC at `python/PoCs/XmlParsing/PoC.py` for some tests of vulnerable XML parsing.
*/
class XmlParsingVulnerabilityKind extends string {
XmlParsingVulnerabilityKind() { this in ["XML bomb", "XXE", "DTD retrieval"] }
/**
* Holds for XML bomb vulnerability kind, such as 'Billion Laughs' and 'Quadratic
* Blowup'.
*
* While a parser could technically be vulnerable to one and not the other, from our
* point of view the interesting part is that it IS vulnerable to these types of
* attacks, and not so much which one specifically works. In practice I haven't seen
* a parser that is vulnerable to one and not the other.
*/
predicate isXmlBomb() { this = "XML bomb" }
/** Holds for XXE vulnerability kind. */
predicate isXxe() { this = "XXE" }
/** Holds for DTD retrieval vulnerability kind. */
predicate isDtdRetrieval() { this = "DTD retrieval" }
}
/**
* A data-flow node that parses XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XmlParsing` instead.
*/
class XmlParsing extends Decoding instanceof XmlParsing::Range {
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
predicate vulnerableTo(XmlParsingVulnerabilityKind kind) { super.vulnerableTo(kind) }
}
/** Provides classes for modeling XML parsing APIs. */
module XmlParsing {
/**
* A data-flow node that parses XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XmlParsing` instead.
*/
abstract class Range extends Decoding::Range {
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
abstract predicate vulnerableTo(XmlParsingVulnerabilityKind kind);
override string getFormat() { result = "XML" }
}
}
}
/** Provides classes for modeling LDAP-related APIs. */

View File

@@ -52,3 +52,4 @@ private import semmle.python.frameworks.Ujson
private import semmle.python.frameworks.Urllib3
private import semmle.python.frameworks.Yaml
private import semmle.python.frameworks.Yarl
private import semmle.python.frameworks.Xmltodict

View File

@@ -19,6 +19,9 @@ private import semmle.python.ApiGraphs
* - https://lxml.de/tutorial.html
*/
private module Lxml {
// ---------------------------------------------------------------------------
// XPath
// ---------------------------------------------------------------------------
/**
* A class constructor compiling an XPath expression.
*
@@ -57,13 +60,25 @@ private module Lxml {
*/
class XPathCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
XPathCall() {
this =
API::moduleImport("lxml")
.getMember("etree")
.getMember(["parse", "fromstring", "fromstringlist", "HTML", "XML"])
.getReturn()
.getMember("xpath")
.getACall()
exists(API::Node parseResult |
parseResult =
API::moduleImport("lxml")
.getMember("etree")
.getMember(["parse", "fromstring", "fromstringlist", "HTML", "XML"])
.getReturn()
or
// TODO: lxml.etree.parseid(<text>)[0] will contain the root element from parsing <text>
// but we don't really have a way to model that nicely.
parseResult =
API::moduleImport("lxml")
.getMember("etree")
.getMember("XMLParser")
.getReturn()
.getMember("close")
.getReturn()
|
this = parseResult.getMember("xpath").getACall()
)
}
override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("_path")] }
@@ -85,4 +100,235 @@ private module Lxml {
override string getName() { result = "lxml.etree" }
}
// ---------------------------------------------------------------------------
// Parsing
// ---------------------------------------------------------------------------
/**
* Provides models for `lxml.etree` parsers.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
module XmlParser {
/**
* A source of instances of `lxml.etree` parsers, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `XmlParser::instance()` to get references to instances of `lxml.etree` parsers.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode {
/** Holds if this instance is vulnerable to `kind`. */
abstract predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind);
}
/**
* A call to `lxml.etree.XMLParser`.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
private class LxmlParser extends InstanceSource, API::CallNode {
LxmlParser() {
this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
}
// NOTE: it's not possible to change settings of a parser after constructing it
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
kind.isXxe() and
(
// resolve_entities has default True
not exists(this.getArgByName("resolve_entities"))
or
this.getKeywordParameter("resolve_entities").getAValueReachingRhs().asExpr() = any(True t)
)
or
kind.isXmlBomb() and
this.getKeywordParameter("huge_tree").getAValueReachingRhs().asExpr() = any(True t) and
not this.getKeywordParameter("resolve_entities").getAValueReachingRhs().asExpr() =
any(False t)
or
kind.isDtdRetrieval() and
this.getKeywordParameter("load_dtd").getAValueReachingRhs().asExpr() = any(True t) and
this.getKeywordParameter("no_network").getAValueReachingRhs().asExpr() = any(False t)
}
}
/**
* A call to `lxml.etree.get_default_parser`.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
*/
private class LxmlDefaultParser extends InstanceSource, DataFlow::CallCfgNode {
LxmlDefaultParser() {
this =
API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
}
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
// as highlighted by
// https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
// by default XXE is allow. so as long as the default parser has not been
// overridden, the result is also vuln to XXE.
kind.isXxe()
// TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`.
}
}
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) {
t.start() and
result = origin
or
exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t))
}
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
DataFlow::Node instance(InstanceSource origin) {
instance(DataFlow::TypeTracker::end(), origin).flowsTo(result)
}
/** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */
DataFlow::Node instanceVulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind))
}
/**
* A call to the `feed` method of an `lxml` parser.
*/
private class LxmlParserFeedCall extends DataFlow::MethodCallNode, XML::XmlParsing::Range {
LxmlParserFeedCall() { this.calls(instance(_), "feed") }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
this.calls(instanceVulnerableTo(kind), "feed")
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
exists(DataFlow::Node objRef |
DataFlow::localFlow(this.getObject(), objRef) and
result.(DataFlow::MethodCallNode).calls(objRef, "close")
)
}
}
}
/**
* A call to either of:
* - `lxml.etree.fromstring`
* - `lxml.etree.fromstringlist`
* - `lxml.etree.XML`
* - `lxml.etree.XMLID`
* - `lxml.etree.parse`
* - `lxml.etree.parseid`
*
* See
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstringlist
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.XML
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.XMLID
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid
*/
private class LxmlParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range {
string functionName;
LxmlParsing() {
functionName in ["fromstring", "fromstringlist", "XML", "XMLID", "parse", "parseid"] and
this = API::moduleImport("lxml").getMember("etree").getMember(functionName).getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// fromstring / XML / XMLID
this.getArgByName("text"),
// fromstringlist
this.getArgByName("strings"),
// parse / parseid
this.getArgByName("source"),
]
}
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
this.getParserArg() = XmlParser::instanceVulnerableTo(kind)
or
kind.isXxe() and
not exists(this.getParserArg())
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
// Note: for `parseid`/XMLID the result of the call is a tuple with `(root, dict)`, so
// maybe we should not just say that the entire tuple is the decoding output... my
// gut feeling is that THIS instance doesn't matter too much, but that it would be
// nice to be able to do this in general. (this is a problem for both `lxml.etree`
// and `xml.etree`)
result = this
}
}
/**
* A call to `lxml.etree.ElementTree.parse` or `lxml.etree.ElementTree.parseid`, which
* takes either a filename or a file-like object as argument. To capture the filename
* for path-injection, we have this subclass.
*
* See
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid
*/
private class FileAccessFromLxmlParsing extends LxmlParsing, FileSystemAccess::Range {
FileAccessFromLxmlParsing() {
functionName in ["parse", "parseid"]
// I considered whether we should try to reduce FPs from people passing file-like
// objects, which will not be a file system access (and couldn't cause a
// path-injection).
//
// I suppose that once we have proper flow-summary support for file-like objects,
// we can make the XXE/XML-bomb sinks allow an access-path, while the
// path-injection sink wouldn't, and then we will not end up with such FPs.
}
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}
/**
* A call to `lxml.etree.iterparse`
*
* See
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.iterparse
*/
private class LxmlIterparseCall extends API::CallNode, XML::XmlParsing::Range,
FileSystemAccess::Range {
LxmlIterparseCall() {
this = API::moduleImport("lxml").getMember("etree").getMember("iterparse").getACall()
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
// note that there is no `resolve_entities` argument, so it's not possible to turn off XXE :O
kind.isXxe()
or
kind.isXmlBomb() and
this.getKeywordParameter("huge_tree").getAValueReachingRhs().asExpr() = any(True t)
or
kind.isDtdRetrieval() and
this.getKeywordParameter("load_dtd").getAValueReachingRhs().asExpr() = any(True t) and
this.getKeywordParameter("no_network").getAValueReachingRhs().asExpr() = any(False t)
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() { result = this }
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}
}

View File

@@ -2890,70 +2890,6 @@ private module StdlibPrivate {
override string getKind() { result = Escaping::getRegexKind() }
}
// ---------------------------------------------------------------------------
// xml.etree.ElementTree
// ---------------------------------------------------------------------------
/**
* An instance of `xml.etree.ElementTree.ElementTree`.
*
* See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.ElementTree
*/
private API::Node elementTreeInstance() {
//parse to a tree
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("parse")
.getReturn()
or
// construct a tree without parsing
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("ElementTree")
.getReturn()
}
/**
* An instance of `xml.etree.ElementTree.Element`.
*
* See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.Element
*/
private API::Node elementInstance() {
// parse or go to the root of a tree
result = elementTreeInstance().getMember(["parse", "getroot"]).getReturn()
or
// parse directly to an element
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["fromstring", "fromstringlist", "XML"])
.getReturn()
}
/**
* A call to a find method on a tree or an element will execute an XPath expression.
*/
private class ElementTreeFindCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
string methodName;
ElementTreeFindCall() {
methodName in ["find", "findall", "findtext"] and
(
this = elementTreeInstance().getMember(methodName).getACall()
or
this = elementInstance().getMember(methodName).getACall()
)
}
override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("match")] }
override string getName() { result = "xml.etree" }
}
// ---------------------------------------------------------------------------
// urllib
// ---------------------------------------------------------------------------
@@ -3171,6 +3107,547 @@ private module StdlibPrivate {
result in [this.getArg(0), this.getArgByName("path")]
}
}
// ---------------------------------------------------------------------------
// io
// ---------------------------------------------------------------------------
/**
* Provides models for the `io.StringIO`/`io.BytesIO` classes
*
* See https://docs.python.org/3.10/library/io.html#io.StringIO.
*/
module StringIO {
/** Gets a reference to the `io.StringIO` class. */
private API::Node classRef() {
result = API::moduleImport("io").getMember(["StringIO", "BytesIO"])
}
/**
* A source of instances of `io.StringIO`/`io.BytesIO`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `StringIO::instance()` to get references to instances of `io.StringIO`.
*/
abstract class InstanceSource extends Stdlib::FileLikeObject::InstanceSource { }
/** A direct instantiation of `io.StringIO`/`io.BytesIO`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }
DataFlow::Node getInitialValue() {
result = this.getArg(0)
or
// `initial_value` for StringIO, `initial_bytes` for BytesIO
result = this.getArgByName(["initial_value", "initial_bytes"])
}
}
/** Gets a reference to an instance of `io.StringIO`/`io.BytesIO`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `io.StringIO`/`io.BytesIO`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Extra taint propagation for `io.StringIO`/`io.BytesIO`.
*/
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.(ClassInstantiation).getInitialValue() = nodeFrom
}
}
}
// ---------------------------------------------------------------------------
// xml.etree.ElementTree
// ---------------------------------------------------------------------------
/**
* An instance of `xml.etree.ElementTree.ElementTree`.
*
* See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.ElementTree
*/
private API::Node elementTreeInstance() {
//parse to a tree
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("parse")
.getReturn()
or
// construct a tree without parsing
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("ElementTree")
.getReturn()
}
/**
* An instance of `xml.etree.ElementTree.Element`.
*
* See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.Element
*/
private API::Node elementInstance() {
// parse or go to the root of a tree
result = elementTreeInstance().getMember(["parse", "getroot"]).getReturn()
or
// parse directly to an element
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["fromstring", "fromstringlist", "XML"])
.getReturn()
or
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("XMLParser")
.getReturn()
.getMember("close")
.getReturn()
}
/**
* A call to a find method on a tree or an element will execute an XPath expression.
*/
private class ElementTreeFindCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
string methodName;
ElementTreeFindCall() {
methodName in ["find", "findall", "findtext"] and
(
this = elementTreeInstance().getMember(methodName).getACall()
or
this = elementInstance().getMember(methodName).getACall()
)
}
override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("match")] }
override string getName() { result = "xml.etree" }
}
/**
* Provides models for `xml.etree` parsers
*
* See
* - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLParser
* - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLPullParser
*/
module XmlParser {
/**
* A source of instances of `xml.etree` parsers, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `XmlParser::instance()` to get references to instances of `xml.etree` parsers.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `xml.etree` parsers. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["XMLParser", "XMLPullParser"])
.getACall()
}
}
/** Gets a reference to an `xml.etree` parser instance. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an `xml.etree` parser instance. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* A call to the `feed` method of an `xml.etree` parser.
*/
private class XmlEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XmlParsing::Range {
XmlEtreeParserFeedCall() { this.calls(instance(), "feed") }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { kind.isXmlBomb() }
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
exists(DataFlow::Node objRef |
DataFlow::localFlow(this.getObject(), objRef) and
result.(DataFlow::MethodCallNode).calls(objRef, "close")
)
}
}
}
/**
* A call to either of:
* - `xml.etree.ElementTree.fromstring`
* - `xml.etree.ElementTree.fromstringlist`
* - `xml.etree.ElementTree.XML`
* - `xml.etree.ElementTree.XMLID`
* - `xml.etree.ElementTree.parse`
* - `xml.etree.ElementTree.iterparse`
* - `parse` method on an `xml.etree.ElementTree.ElementTree` instance
*
* See
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.fromstring
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.fromstringlist
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.XML
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLID
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse
*/
private class XmlEtreeParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range {
XmlEtreeParsing() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"])
.getACall()
or
this = elementTreeInstance().getMember("parse").getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// fromstring / XML / XMLID
this.getArgByName("text"),
// fromstringlist
this.getArgByName("sequence"),
// parse / iterparse
this.getArgByName("source"),
]
}
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
// note: it does not matter what `xml.etree` parser you are using, you cannot
// change the security features anyway :|
kind.isXmlBomb()
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
// Note: for `XMLID` the result of the call is a tuple with `(root, dict)`, so
// maybe we should not just say that the entire tuple is the decoding output... my
// gut feeling is that THIS instance doesn't matter too much, but that it would be
// nice to be able to do this in general. (this is a problem for both `lxml.etree`
// and `xml.etree`)
result = this
}
}
/**
* A call to `xml.etree.ElementTree.parse` or `xml.etree.ElementTree.iterparse`, which
* takes either a filename or a file-like object as argument. To capture the filename
* for path-injection, we have this subclass.
*
* See
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse
*/
private class FileAccessFromXmlEtreeParsing extends XmlEtreeParsing, FileSystemAccess::Range {
FileAccessFromXmlEtreeParsing() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["parse", "iterparse"])
.getACall()
or
this = elementTreeInstance().getMember("parse").getACall()
// I considered whether we should try to reduce FPs from people passing file-like
// objects, which will not be a file system access (and couldn't cause a
// path-injection).
//
// I suppose that once we have proper flow-summary support for file-like objects,
// we can make the XXE/XML-bomb sinks allow an access-path, while the
// path-injection sink wouldn't, and then we will not end up with such FPs.
}
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}
// ---------------------------------------------------------------------------
// xml.sax
// ---------------------------------------------------------------------------
/**
* A call to the `setFeature` method on a XML sax parser.
*
* See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature
*/
private class SaxParserSetFeatureCall extends API::CallNode, DataFlow::MethodCallNode {
SaxParserSetFeatureCall() {
this =
API::moduleImport("xml")
.getMember("sax")
.getMember("make_parser")
.getReturn()
.getMember("setFeature")
.getACall()
}
// The keyword argument names does not match documentation. I checked (with Python
// 3.9.5) that the names used here actually works.
API::Node getFeatureArg() { result = this.getParameter(0, "name") }
API::Node getStateArg() { result = this.getParameter(1, "state") }
}
/**
* Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
*
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
*/
private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) {
t.start() and
exists(SaxParserSetFeatureCall call |
call.getFeatureArg().getARhs() =
API::moduleImport("xml")
.getMember("sax")
.getMember("handler")
.getMember("feature_external_ges")
.getAUse() and
call.getStateArg().getAValueReachingRhs().asExpr().(BooleanLiteral).booleanValue() = true and
result = call.getObject()
)
or
exists(DataFlow::TypeTracker t2 |
t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result)
) and
// take account of that we can set the feature to False, which makes the parser safe again
not exists(SaxParserSetFeatureCall call |
call.getObject() = result and
call.getFeatureArg().getARhs() =
API::moduleImport("xml")
.getMember("sax")
.getMember("handler")
.getMember("feature_external_ges")
.getAUse() and
call.getStateArg().getAValueReachingRhs().asExpr().(BooleanLiteral).booleanValue() = false
)
}
/**
* Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
*
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
*/
DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() {
result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end())
}
/**
* A call to the `parse` method on a SAX XML parser.
*
* See https://docs.python.org/3/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.parse
*/
private class XmlSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XmlParsing::Range,
FileSystemAccess::Range {
XmlSaxInstanceParsing() {
this =
API::moduleImport("xml")
.getMember("sax")
.getMember("make_parser")
.getReturn()
.getMember("parse")
.getACall()
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
// always vuln to these
kind.isXmlBomb()
or
// can be vuln to other things if features has been turned on
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
// note: the output of parsing with SAX is that the content handler gets the
// data... but we don't currently model this (it's not trivial to do, and won't
// really give us any value, at least not as of right now).
none()
}
override DataFlow::Node getAPathArgument() {
// I considered whether we should try to reduce FPs from people passing file-like
// objects, which will not be a file system access (and couldn't cause a
// path-injection).
//
// I suppose that once we have proper flow-summary support for file-like objects,
// we can make the XXE/XML-bomb sinks allow an access-path, while the
// path-injection sink wouldn't, and then we will not end up with such FPs.
result = this.getAnInput()
}
}
/**
* A call to either `parse` or `parseString` from `xml.sax` module.
*
* See:
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString
*/
private class XmlSaxParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range {
XmlSaxParsing() {
this =
API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// parseString
this.getArgByName("string"),
// parse
this.getArgByName("source"),
]
}
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
// always vuln to these
kind.isXmlBomb()
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
// note: the output of parsing with SAX is that the content handler gets the
// data... but we don't currently model this (it's not trivial to do, and won't
// really give us any value, at least not as of right now).
none()
}
}
/**
* A call to `xml.sax.parse`, which takes either a filename or a file-like object as
* argument. To capture the filename for path-injection, we have this subclass.
*
* See
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse
*/
private class FileAccessFromXmlSaxParsing extends XmlSaxParsing, FileSystemAccess::Range {
FileAccessFromXmlSaxParsing() {
this = API::moduleImport("xml").getMember("sax").getMember("parse").getACall()
// I considered whether we should try to reduce FPs from people passing file-like
// objects, which will not be a file system access (and couldn't cause a
// path-injection).
//
// I suppose that once we have proper flow-summary support for file-like objects,
// we can make the XXE/XML-bomb sinks allow an access-path, while the
// path-injection sink wouldn't, and then we will not end up with such FPs.
}
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}
// ---------------------------------------------------------------------------
// xml.dom.*
// ---------------------------------------------------------------------------
/**
* A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`.
*
* Both of these modules are based on SAX parsers.
*
* See
* - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse
* - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse
*/
private class XmlDomParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range {
XmlDomParsing() {
this =
API::moduleImport("xml")
.getMember("dom")
.getMember(["minidom", "pulldom"])
.getMember(["parse", "parseString"])
.getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// parseString
this.getArgByName("string"),
// minidom.parse
this.getArgByName("file"),
// pulldom.parse
this.getArgByName("stream_or_string"),
]
}
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
or
kind.isXmlBomb()
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() { result = this }
}
/**
* A call to the `parse` or `parseString` methods from `xml.dom.minidom` or
* `xml.dom.pulldom`, which takes either a filename or a file-like object as argument.
* To capture the filename for path-injection, we have this subclass.
*
* See
* - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse
* - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse
*/
private class FileAccessFromXmlDomParsing extends XmlDomParsing, FileSystemAccess::Range {
FileAccessFromXmlDomParsing() {
this =
API::moduleImport("xml")
.getMember("dom")
.getMember(["minidom", "pulldom"])
.getMember("parse")
.getACall()
// I considered whether we should try to reduce FPs from people passing file-like
// objects, which will not be a file system access (and couldn't cause a
// path-injection).
//
// I suppose that once we have proper flow-summary support for file-like objects,
// we can make the XXE/XML-bomb sinks allow an access-path, while the
// path-injection sink wouldn't, and then we will not end up with such FPs.
}
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}
}
// ---------------------------------------------------------------------------

View File

@@ -0,0 +1,39 @@
/**
* Provides classes modeling security-relevant aspects of the `xmltodict` PyPI package.
*
* See
* - https://pypi.org/project/xmltodict/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides classes modeling security-relevant aspects of the `xmltodict` PyPI package
*
* See
* - https://pypi.org/project/xmltodict/
*/
private module Xmltodict {
/**
* A call to `xmltodict.parse`.
*/
private class XMLtoDictParsing extends API::CallNode, XML::XmlParsing::Range {
XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("xml_input")]
}
override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) {
kind.isXmlBomb() and
this.getKeywordParameter("disable_entities").getAValueReachingRhs().asExpr() = any(False f)
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() { result = this }
}
}

View File

@@ -0,0 +1,49 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "XML bomb"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
/**
* Provides default sources, sinks and sanitizers for detecting "XML bomb"
* vulnerabilities, as well as extension points for adding your own.
*/
module XmlBomb {
/**
* A data flow source for XML-bomb vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for XML-bomb vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for XML-bomb vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/** A source of remote user input, considered as a flow source for XML bomb vulnerabilities. */
class RemoteFlowSourceAsSource extends Source {
RemoteFlowSourceAsSource() { this instanceof RemoteFlowSource }
}
/**
* A call to an XML parser that is vulnerable to XML bombs.
*/
class XmlParsingVulnerableToXmlBomb extends Sink {
XmlParsingVulnerableToXmlBomb() {
exists(XML::XmlParsing parsing, XML::XmlParsingVulnerabilityKind kind |
kind.isXmlBomb() and
parsing.vulnerableTo(kind) and
this = parsing.getAnInput()
)
}
}
}

View File

@@ -0,0 +1,28 @@
/**
* Provides a taint-tracking configuration for detecting "XML bomb" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `Configuration` is needed, otherwise
* `XmlBombCustomizations` should be imported instead.
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import XmlBombCustomizations::XmlBomb
/**
* A taint-tracking configuration for detecting "XML bomb" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "XmlBomb" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) {
super.isSanitizer(node) or
node instanceof Sanitizer
}
}

View File

@@ -0,0 +1,49 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "XML External Entity (XXE)"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
/**
* Provides default sources, sinks and sanitizers for detecting "XML External Entity (XXE)"
* vulnerabilities, as well as extension points for adding your own.
*/
module Xxe {
/**
* A data flow source for XXE vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for XXE vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for XXE vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/** A source of remote user input, considered as a flow source for XXE vulnerabilities. */
class RemoteFlowSourceAsSource extends Source {
RemoteFlowSourceAsSource() { this instanceof RemoteFlowSource }
}
/**
* A call to an XML parser that is vulnerable to XXE.
*/
class XmlParsingVulnerableToXxe extends Sink {
XmlParsingVulnerableToXxe() {
exists(XML::XmlParsing parsing, XML::XmlParsingVulnerabilityKind kind |
kind.isXxe() and
parsing.vulnerableTo(kind) and
this = parsing.getAnInput()
)
}
}
}

View File

@@ -0,0 +1,28 @@
/**
* Provides a taint-tracking configuration for detecting "XML External Entity (XXE)" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `Configuration` is needed, otherwise
* `XxeCustomizations` should be imported instead.
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import XxeCustomizations::Xxe
/**
* A taint-tracking configuration for detecting "XML External Entity (XXE)" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "Xxe" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) {
super.isSanitizer(node) or
node instanceof Sanitizer
}
}

View File

@@ -0,0 +1,74 @@
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
<qhelp>
<overview>
<p>
Parsing untrusted XML files with a weakly configured XML parser may lead to an
XML External Entity (XXE) attack. This type of attack uses external entity references
to access arbitrary files on a system, carry out denial-of-service (DoS) attacks, or server-side
request forgery. Even when the result of parsing is not returned to the user, DoS attacks are still possible
and out-of-band data retrieval techniques may allow attackers to steal sensitive data.
</p>
</overview>
<recommendation>
<p>
The easiest way to prevent XXE attacks is to disable external entity handling when
parsing untrusted data. How this is done depends on the library being used. Note that some
libraries, such as recent versions of the XML libraries in the standard library of Python 3,
disable entity expansion by default,
so unless you have explicitly enabled entity expansion, no further action needs to be taken.
</p>
<p>
We recommend using the <a href="https://pypi.org/project/defusedxml/">defusedxml</a>
PyPI package, which has been created to prevent XML attacks (both XXE and XML bombs).
</p>
</recommendation>
<example>
<p>
The following example uses the <code>lxml</code> XML parser to parse a string
<code>xml_src</code>. That string is from an untrusted source, so this code is
vulnerable to an XXE attack, since the <a href="https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XMLParser">
default parser</a> from <code>lxml.etree</code> allows local external entities to be resolved.
</p>
<sample src="examples/XxeBad.py"/>
<p>
To guard against XXE attacks with the <code>lxml</code> library, you should create a
parser with <code>resolve_entities</code> set to <code>false</code>. This means that no
entity expansion is undertaken, althuogh standard predefined entities such as
<code>&amp;gt;</code>, for writing <code>&gt;</code> inside the text of an XML element,
are still allowed.
</p>
<sample src="examples/XxeGood.py"/>
</example>
<references>
<li>
OWASP:
<a href="https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Processing">XML External Entity (XXE) Processing</a>.
</li>
<li>
Timothy Morgen:
<a href="https://research.nccgroup.com/2014/05/19/xml-schema-dtd-and-entity-attacks-a-compendium-of-known-techniques/">XML Schema, DTD, and Entity Attacks</a>.
</li>
<li>
Timur Yunusov, Alexey Osipov:
<a href="https://www.slideshare.net/qqlan/bh-ready-v4">XML Out-Of-Band Data Retrieval</a>.
</li>
<li>
Python 3 standard library:
<a href="https://docs.python.org/3/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.
</li>
<li>
Python 2 standard library:
<a href="https://docs.python.org/2/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.
</li>
<li>
PortSwigger:
<a href="https://portswigger.net/web-security/xxe">XML external entity (XXE) injection</a>.
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,23 @@
/**
* @name XML external entity expansion
* @description Parsing user input as an XML document with external
* entity expansion is vulnerable to XXE attacks.
* @kind path-problem
* @problem.severity error
* @security-severity 9.1
* @precision high
* @id py/xxe
* @tags security
* external/cwe/cwe-611
* external/cwe/cwe-827
*/
import python
import semmle.python.security.dataflow.XxeQuery
import DataFlow::PathGraph
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"A $@ is parsed as XML without guarding against external entity expansion.", source.getNode(),
"user-provided value"

View File

@@ -0,0 +1,10 @@
from flask import Flask, request
import lxml.etree
app = Flask(__name__)
@app.post("/upload")
def upload():
xml_src = request.get_data()
doc = lxml.etree.fromstring(xml_src)
return lxml.etree.tostring(doc)

View File

@@ -0,0 +1,11 @@
from flask import Flask, request
import lxml.etree
app = Flask(__name__)
@app.post("/upload")
def upload():
xml_src = request.get_data()
parser = lxml.etree.XMLParser(resolve_entities=False)
doc = lxml.etree.fromstring(xml_src, parser=parser)
return lxml.etree.tostring(doc)

View File

@@ -0,0 +1,74 @@
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
<qhelp>
<overview>
<p>
Parsing untrusted XML files with a weakly configured XML parser may be vulnerable to
denial-of-service (DoS) attacks exploiting uncontrolled internal entity expansion.
</p>
<p>
In XML, so-called <i>internal entities</i> are a mechanism for introducing an abbreviation
for a piece of text or part of a document. When a parser that has been configured
to expand entities encounters a reference to an internal entity, it replaces the entity
by the data it represents. The replacement text may itself contain other entity references,
which are expanded recursively. This means that entity expansion can increase document size
dramatically.
</p>
<p>
If untrusted XML is parsed with entity expansion enabled, a malicious attacker could
submit a document that contains very deeply nested entity definitions, causing the parser
to take a very long time or use large amounts of memory. This is sometimes called an
<i>XML bomb</i> attack.
</p>
</overview>
<recommendation>
<p>
The safest way to prevent XML bomb attacks is to disable entity expansion when parsing untrusted
data. Whether this can be done depends on the library being used. Note that some libraries, such as
<code>lxml</code>, have measures enabled by default to prevent such DoS XML attacks, so
unless you have explicitly set <code>huge_tree</code> to <code>True</code>, no further action is needed.
</p>
<p>
We recommend using the <a href="https://pypi.org/project/defusedxml/">defusedxml</a>
PyPI package, which has been created to prevent XML attacks (both XXE and XML bombs).
</p>
</recommendation>
<example>
<p>
The following example uses the <code>xml.etree</code> XML parser provided by the Python standard library to
parse a string <code>xml_src</code>. That string is from an untrusted source, so this code is
vulnerable to a DoS attack, since the <code>xml.etree</code> XML parser expands internal entities by default:
</p>
<sample src="examples/XmlBombBad.py"/>
<p>
It is not possible to guard against internal entity expansion with
<code>xml.etree</code>, so to guard against these attacks, the following example uses
the <a href="https://pypi.org/project/defusedxml/">defusedxml</a>
PyPI package instead, which is not exposed to such internal entity expansion attacks.
</p>
<sample src="examples/XmlBombGood.py"/>
</example>
<references>
<li>
Wikipedia:
<a href="https://en.wikipedia.org/wiki/Billion_laughs">Billion Laughs</a>.
</li>
<li>
Bryan Sullivan:
<a href="https://msdn.microsoft.com/en-us/magazine/ee335713.aspx">Security Briefs - XML Denial of Service Attacks and Defenses</a>.
</li>
<li>
Python 3 standard library:
<a href="https://docs.python.org/3/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.
</li>
<li>
Python 2 standard library:
<a href="https://docs.python.org/2/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,23 @@
/**
* @name XML internal entity expansion
* @description Parsing user input as an XML document with arbitrary internal
* entity expansion is vulnerable to denial-of-service attacks.
* @kind path-problem
* @problem.severity warning
* @security-severity 7.5
* @precision high
* @id py/xml-bomb
* @tags security
* external/cwe/cwe-776
* external/cwe/cwe-400
*/
import python
import semmle.python.security.dataflow.XmlBombQuery
import DataFlow::PathGraph
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"A $@ is parsed as XML without guarding against uncontrolled entity expansion.", source.getNode(),
"user-provided value"

View File

@@ -0,0 +1,10 @@
from flask import Flask, request
import xml.etree.ElementTree as ET
app = Flask(__name__)
@app.post("/upload")
def upload():
xml_src = request.get_data()
doc = ET.fromstring(xml_src)
return ET.tostring(doc)

View File

@@ -0,0 +1,10 @@
from flask import Flask, request
import defusedxml.ElementTree as ET
app = Flask(__name__)
@app.post("/upload")
def upload():
xml_src = request.get_data()
doc = ET.fromstring(xml_src)
return ET.tostring(doc)

View File

@@ -0,0 +1,5 @@
---
category: newQuery
---
* "XML external entity expansion" (`py/xxe`). Results will appear by default. This query was based on [an experimental query by @jorgectf](https://github.com/github/codeql/pull/6112).
* "XML internal entity expansion" (`py/xml-bomb`). Results will appear by default. This query was based on [an experimental query by @jorgectf](https://github.com/github/codeql/pull/6112).

View File

@@ -10,16 +10,10 @@
*/
private import python
private import experimental.semmle.python.Concepts
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
from DataFlow::CallCfgNode call, string kinds
from DataFlow::CallCfgNode call
where
call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and
kinds =
strictconcat(ExperimentalXML::XMLVulnerabilityKind kind |
kind.isBillionLaughs() or kind.isQuadraticBlowup()
|
kind, ", "
)
select call, "SimpleXMLRPCServer is vulnerable to: " + kinds + "."
call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall()
select call, "SimpleXMLRPCServer is vulnerable to XML bombs"

View File

@@ -1,4 +0,0 @@
<?xml version="1.0"?>
<!DOCTYPE dt [
<!ENTITY xxe SYSTEM "file:///etc/passwd">]>
<test>&xxe;</test>

View File

@@ -1,25 +0,0 @@
from flask import request, Flask
import lxml.etree
import xml.etree.ElementTree
app = Flask(__name__)
# BAD
@app.route("/bad")
def bad():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser()
parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
return parsed_xml.text
# GOOD
@app.route("/good")
def good():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(resolve_entities=False)
parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
return parsed_xml.text

View File

@@ -1,48 +0,0 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Parsing untrusted XML files with a weakly configured XML parser may lead to attacks such as XML External Entity (XXE),
Billion Laughs, Quadratic Blowup and DTD retrieval.
This type of attack uses external entity references to access arbitrary files on a system, carry out denial of
service, or server side request forgery. Even when the result of parsing is not returned to the user, out-of-band
data retrieval techniques may allow attackers to steal sensitive data. Denial of services can also be carried out
in this situation.
</p>
</overview>
<recommendation>
<p>
Use <a href="https://pypi.org/project/defusedxml/">defusedxml</a>, a Python package aimed
to prevent any potentially malicious operation.
</p>
</recommendation>
<example>
<p>
The following example calls <code>xml.etree.ElementTree.fromstring</code> using a parser (<code>lxml.etree.XMLParser</code>)
that is not safely configured on untrusted data, and is therefore inherently unsafe.
</p>
<sample src="XmlEntityInjection.py"/>
<p>
Providing an input (<code>xml_content</code>) like the following XML content against /bad, the request response would contain the contents of
<code>/etc/passwd</code>.
</p>
<sample src="XXE.xml"/>
</example>
<references>
<li>Python 3 <a href="https://docs.python.org/3/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.</li>
<li>Python 2 <a href="https://docs.python.org/2/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.</li>
<li>Python <a href="https://www.edureka.co/blog/python-xml-parser-tutorial/">XML Parsing</a>.</li>
<li>OWASP vulnerability description: <a href="https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Processing">XML External Entity (XXE) Processing</a>.</li>
<li>OWASP guidance on parsing xml files: <a href="https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#python">XXE Prevention Cheat Sheet</a>.</li>
<li>Paper by Timothy Morgen: <a href="https://research.nccgroup.com/2014/05/19/xml-schema-dtd-and-entity-attacks-a-compendium-of-known-techniques/">XML Schema, DTD, and Entity Attacks</a></li>
<li>Out-of-band data retrieval: Timur Yunusov &amp; Alexey Osipov, Black hat EU 2013: <a href="https://www.slideshare.net/qqlan/bh-ready-v4">XML Out-Of-Band Data Retrieval</a>.</li>
<li>Denial of service attack (Billion laughs): <a href="https://en.wikipedia.org/wiki/Billion_laughs">Billion Laughs.</a></li>
</references>
</qhelp>

View File

@@ -1,31 +0,0 @@
/**
* @name XML Entity injection
* @description User input should not be parsed allowing the injection of entities.
* @kind path-problem
* @problem.severity error
* @id py/xml-entity-injection
* @tags security
* external/cwe/cwe-611
* external/cwe/cwe-776
* external/cwe/cwe-827
*/
// determine precision above
import python
import experimental.semmle.python.security.dataflow.XmlEntityInjection
import DataFlow::PathGraph
from
XmlEntityInjection::XmlEntityInjectionConfiguration config, DataFlow::PathNode source,
DataFlow::PathNode sink, string kinds
where
config.hasFlowPath(source, sink) and
kinds =
strictconcat(string kind |
kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind()
|
kind, ", "
)
select sink.getNode(), source, sink,
"$@ XML input is constructed from a $@ and is vulnerable to: " + kinds + ".", sink.getNode(),
"This", source.getNode(), "user-provided value"

View File

@@ -81,74 +81,6 @@ class LogOutput extends DataFlow::Node {
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/**
* Since there is both XML module in normal and experimental Concepts,
* we have to rename the experimental module as this.
*/
module ExperimentalXML {
/**
* A kind of XML vulnerability.
*
* See https://pypi.org/project/defusedxml/#python-xml-libraries
*/
class XMLVulnerabilityKind extends string {
XMLVulnerabilityKind() {
this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval"]
}
/** Holds for Billion Laughs vulnerability kind. */
predicate isBillionLaughs() { this = "Billion Laughs" }
/** Holds for Quadratic Blowup vulnerability kind. */
predicate isQuadraticBlowup() { this = "Quadratic Blowup" }
/** Holds for XXE vulnerability kind. */
predicate isXxe() { this = "XXE" }
/** Holds for DTD retrieval vulnerability kind. */
predicate isDtdRetrieval() { this = "DTD retrieval" }
}
/**
* A data-flow node that parses XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParsing` instead.
*/
class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range {
/**
* Gets the argument containing the content to parse.
*/
DataFlow::Node getAnInput() { result = super.getAnInput() }
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
predicate vulnerableTo(XMLVulnerabilityKind kind) { super.vulnerableTo(kind) }
}
/** Provides classes for modeling XML parsing APIs. */
module XMLParsing {
/**
* A data-flow node that parses XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParsing` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the content to parse.
*/
abstract DataFlow::Node getAnInput();
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
abstract predicate vulnerableTo(XMLVulnerabilityKind kind);
}
}
}
/** Provides classes for modeling LDAP query execution-related APIs. */
module LdapQuery {
/**

View File

@@ -3,7 +3,6 @@
*/
private import experimental.semmle.python.frameworks.Stdlib
private import experimental.semmle.python.frameworks.Xml
private import experimental.semmle.python.frameworks.Flask
private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Werkzeug

View File

@@ -1,442 +0,0 @@
/**
* Provides class and predicates to track external data that
* may represent malicious XML objects.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
module XML = ExperimentalXML;
private module XmlEtree {
/**
* Provides models for `xml.etree` parsers
*
* See
* - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLParser
* - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLPullParser
*/
module XMLParser {
/**
* A source of instances of `xml.etree` parsers, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `XMLParser::instance()` to get references to instances of `xml.etree` parsers.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `xml.etree` parsers. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("XMLParser")
.getACall()
or
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("XMLPullParser")
.getACall()
}
}
/** Gets a reference to an `xml.etree` parser instance. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an `xml.etree` parser instance. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* A call to the `feed` method of an `xml.etree` parser.
*/
private class XMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
XMLEtreeParserFeedCall() { this.calls(instance(), "feed") }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
kind.isBillionLaughs() or kind.isQuadraticBlowup()
}
}
}
/**
* A call to either of:
* - `xml.etree.ElementTree.fromstring`
* - `xml.etree.ElementTree.fromstringlist`
* - `xml.etree.ElementTree.XML`
* - `xml.etree.ElementTree.XMLID`
* - `xml.etree.ElementTree.parse`
* - `xml.etree.ElementTree.iterparse`
*/
private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLEtreeParsing() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"])
.getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// fromstring / XML / XMLID
this.getArgByName("text"),
// fromstringlist
this.getArgByName("sequence"),
// parse / iterparse
this.getArgByName("source"),
]
}
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// note: it does not matter what `xml.etree` parser you are using, you cannot
// change the security features anyway :|
kind.isBillionLaughs() or kind.isQuadraticBlowup()
}
}
}
private module SaxBasedParsing {
/**
* A call to the `setFeature` method on a XML sax parser.
*
* See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature
*/
class SaxParserSetFeatureCall extends API::CallNode, DataFlow::MethodCallNode {
SaxParserSetFeatureCall() {
this =
API::moduleImport("xml")
.getMember("sax")
.getMember("make_parser")
.getReturn()
.getMember("setFeature")
.getACall()
}
// The keyword argument names does not match documentation. I checked (with Python
// 3.9.5) that the names used here actually works.
API::Node getFeatureArg() { result = this.getParameter(0, "name") }
API::Node getStateArg() { result = this.getParameter(1, "state") }
}
/**
* Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
*
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
*/
private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) {
t.start() and
exists(SaxParserSetFeatureCall call |
call.getFeatureArg().getARhs() =
API::moduleImport("xml")
.getMember("sax")
.getMember("handler")
.getMember("feature_external_ges")
.getAUse() and
call.getStateArg().getAValueReachingRhs().asExpr().(BooleanLiteral).booleanValue() = true and
result = call.getObject()
)
or
exists(DataFlow::TypeTracker t2 |
t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result)
) and
// take account of that we can set the feature to False, which makes the parser safe again
not exists(SaxParserSetFeatureCall call |
call.getObject() = result and
call.getFeatureArg().getARhs() =
API::moduleImport("xml")
.getMember("sax")
.getMember("handler")
.getMember("feature_external_ges")
.getAUse() and
call.getStateArg().getAValueReachingRhs().asExpr().(BooleanLiteral).booleanValue() = false
)
}
/**
* Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
*
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
*/
DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() {
result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end())
}
/**
* A call to the `parse` method on a SAX XML parser.
*/
private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
XMLSaxInstanceParsing() {
this =
API::moduleImport("xml")
.getMember("sax")
.getMember("make_parser")
.getReturn()
.getMember("parse")
.getACall()
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// always vuln to these
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
or
// can be vuln to other things if features has been turned on
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
}
}
/**
* A call to either `parse` or `parseString` from `xml.sax` module.
*
* See:
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString
*/
private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
XMLSaxParsing() {
this =
API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// parseString
this.getArgByName("string"),
// parse
this.getArgByName("source"),
]
}
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// always vuln to these
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
or
// can be vuln to other things if features has been turned on
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
}
}
/**
* A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`.
*
* Both of these modules are based on SAX parsers.
*/
private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLDomParsing() {
this =
API::moduleImport("xml")
.getMember("dom")
.getMember(["minidom", "pulldom"])
.getMember(["parse", "parseString"])
.getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// parseString
this.getArgByName("string"),
// minidom.parse
this.getArgByName("file"),
// pulldom.parse
this.getArgByName("stream_or_string"),
]
}
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
or
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
}
}
}
private module Lxml {
/**
* Provides models for `lxml.etree` parsers.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
module XMLParser {
/**
* A source of instances of `lxml.etree` parsers, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode {
/** Holds if this instance is vulnerable to `kind`. */
abstract predicate vulnerableTo(XML::XMLVulnerabilityKind kind);
}
/**
* A call to `lxml.etree.XMLParser`.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
private class LXMLParser extends InstanceSource, DataFlow::CallCfgNode {
LXMLParser() {
this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
}
// NOTE: it's not possible to change settings of a parser after constructing it
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
kind.isXxe() and
(
// resolve_entities has default True
not exists(this.getArgByName("resolve_entities"))
or
this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t)
)
or
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and
not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False t)
or
kind.isDtdRetrieval() and
this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and
this.getArgByName("no_network").getALocalSource().asExpr() = any(False t)
}
}
/**
* A call to `lxml.etree.get_default_parser`.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
*/
private class LXMLDefaultParser extends InstanceSource, DataFlow::CallCfgNode {
LXMLDefaultParser() {
this =
API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
}
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// as highlighted by
// https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
// by default XXE is allow. so as long as the default parser has not been
// overridden, the result is also vuln to XXE.
kind.isXxe()
// TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`.
}
}
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) {
t.start() and
result = origin
or
exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t))
}
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
DataFlow::Node instance(InstanceSource origin) {
instance(DataFlow::TypeTracker::end(), origin).flowsTo(result)
}
/** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */
DataFlow::Node instanceVulnerableTo(XML::XMLVulnerabilityKind kind) {
exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind))
}
/**
* A call to the `feed` method of an `lxml` parser.
*/
private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
LXMLParserFeedCall() { this.calls(instance(_), "feed") }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
this.calls(instanceVulnerableTo(kind), "feed")
}
}
}
/**
* A call to either of:
* - `lxml.etree.fromstring`
* - `lxml.etree.fromstringlist`
* - `lxml.etree.XML`
* - `lxml.etree.parse`
* - `lxml.etree.parseid`
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring
*/
private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
LXMLParsing() {
this =
API::moduleImport("lxml")
.getMember("etree")
.getMember(["fromstring", "fromstringlist", "XML", "parse", "parseid"])
.getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// fromstring / XML
this.getArgByName("text"),
// fromstringlist
this.getArgByName("strings"),
// parse / parseid
this.getArgByName("source"),
]
}
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
this.getParserArg() = XMLParser::instanceVulnerableTo(kind)
or
kind.isXxe() and
not exists(this.getParserArg())
}
}
}
private module Xmltodict {
/**
* A call to `xmltodict.parse`.
*/
private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("xml_input")]
}
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f)
}
}
}

View File

@@ -1,28 +0,0 @@
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
module XmlEntityInjection {
import XmlEntityInjectionCustomizations::XmlEntityInjection
class XmlEntityInjectionConfiguration extends TaintTracking::Configuration {
XmlEntityInjectionConfiguration() { this = "XmlEntityInjectionConfiguration" }
override predicate isSource(DataFlow::Node source) {
source instanceof RemoteFlowSourceAsSource
}
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
any(AdditionalTaintStep s).step(nodeFrom, nodeTo)
}
}
}

View File

@@ -1,86 +0,0 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "ldap injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.ApiGraphs
/**
* Provides default sources, sinks and sanitizers for detecting "xml injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module XmlEntityInjection {
/**
* A data flow source for "xml injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "xml injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node {
/** Gets the kind of XML injection that this sink is vulnerable to. */
abstract string getVulnerableKind();
}
/**
* A sanitizer guard for "xml injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A unit class for adding additional taint steps.
*
* Extend this class to add additional taint steps that should apply to `XmlEntityInjection`
* taint configuration.
*/
class AdditionalTaintStep extends Unit {
/**
* Holds if the step from `nodeFrom` to `nodeTo` should be considered a taint
* step for `XmlEntityInjection` configuration.
*/
abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo);
}
/**
* An input to a direct XML parsing function, considered as a flow sink.
*
* See `XML::XMLParsing`.
*/
class XMLParsingInputAsSink extends Sink {
ExperimentalXML::XMLParsing xmlParsing;
XMLParsingInputAsSink() { this = xmlParsing.getAnInput() }
override string getVulnerableKind() { xmlParsing.vulnerableTo(result) }
}
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
/**
* A taint step for `io`'s `StringIO` and `BytesIO` methods.
*/
class IoAdditionalTaintStep extends AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(DataFlow::CallCfgNode ioCalls |
ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and
nodeFrom = ioCalls.getArg(0) and
nodeTo = ioCalls
)
}
}
}

View File

@@ -1,33 +0,0 @@
import python
import experimental.semmle.python.Concepts
import experimental.semmle.python.frameworks.Xml
import semmle.python.dataflow.new.DataFlow
import TestUtilities.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode
class XmlParsingTest extends InlineExpectationsTest {
XmlParsingTest() { this = "XmlParsingTest" }
override string getARelevantTag() { result in ["input", "vuln"] }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(XML::XMLParsing parsing |
exists(DataFlow::Node input |
input = parsing.getAnInput() and
location = input.getLocation() and
element = input.toString() and
value = prettyNodeForInlineTest(input) and
tag = "input"
)
or
exists(XML::XMLVulnerabilityKind kind |
parsing.vulnerableTo(kind) and
location = parsing.getLocation() and
element = parsing.toString() and
value = "'" + kind + "'" and
tag = "vuln"
)
)
}
}

View File

@@ -1,54 +0,0 @@
from io import StringIO
import lxml.etree
x = "some xml"
# different parsing methods
lxml.etree.fromstring(x) # $ input=x vuln='XXE'
lxml.etree.fromstring(text=x) # $ input=x vuln='XXE'
lxml.etree.fromstringlist([x]) # $ input=List vuln='XXE'
lxml.etree.fromstringlist(strings=[x]) # $ input=List vuln='XXE'
lxml.etree.XML(x) # $ input=x vuln='XXE'
lxml.etree.XML(text=x) # $ input=x vuln='XXE'
lxml.etree.parse(StringIO(x)) # $ input=StringIO(..) vuln='XXE'
lxml.etree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE'
lxml.etree.parseid(StringIO(x)) # $ input=StringIO(..) vuln='XXE'
lxml.etree.parseid(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE'
# With default parsers (nothing changed)
parser = lxml.etree.XMLParser()
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE'
parser = lxml.etree.get_default_parser()
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE'
# manual use of feed method
parser = lxml.etree.XMLParser()
parser.feed(x) # $ input=x vuln='XXE'
parser.feed(data=x) # $ input=x vuln='XXE'
parser.close()
# XXE-safe
parser = lxml.etree.XMLParser(resolve_entities=False)
lxml.etree.fromstring(x, parser) # $ input=x
lxml.etree.fromstring(x, parser=parser) # $ input=x
# XXE-vuln
parser = lxml.etree.XMLParser(resolve_entities=True)
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE'
# Billion laughs vuln (also XXE)
parser = lxml.etree.XMLParser(huge_tree=True)
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' vuln='XXE'
# Safe for both Billion laughs and XXE
parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True)
lxml.etree.fromstring(x, parser=parser) # $ input=x
# DTD retrival vuln (also XXE)
parser = lxml.etree.XMLParser(load_dtd=True, no_network=False)
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='DTD retrieval' vuln='XXE'

View File

@@ -1,677 +0,0 @@
#!/usr/bin/env python3
# this file doesn't have a .py extension so the extractor doesn't pick it up, so it
# doesn't have to be annotated
# This file shows the ways to make exploit vulnerable XML parsing
# see
# https://pypi.org/project/defusedxml/#python-xml-libraries
# https://docs.python.org/3.10/library/xml.html#xml-vulnerabilities
import pathlib
from flask import Flask
import threading
import multiprocessing
import time
from io import StringIO
import pytest
HOST = "localhost"
PORT = 8080
FLAG_PATH = pathlib.Path(__file__).with_name("flag")
# ==============================================================================
# xml samples
ok_xml = f"""<?xml version="1.0"?>
<test>hello world</test>
"""
local_xxe = f"""<?xml version="1.0"?>
<!DOCTYPE dt [
<!ENTITY xxe SYSTEM "file://{FLAG_PATH}">
]>
<test>&xxe;</test>
"""
remote_xxe = f"""<?xml version="1.0"?>
<!DOCTYPE dt [
<!ENTITY remote_xxe SYSTEM "http://{HOST}:{PORT}/xxe">
]>
<test>&remote_xxe;</test>
"""
billion_laughs = """<?xml version="1.0"?>
<!DOCTYPE lolz [
<!ENTITY lol "lol">
<!ELEMENT lolz (#PCDATA)>
<!ENTITY lol1 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">
<!ENTITY lol2 "&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;">
<!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">
<!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">
<!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;">
<!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;">
<!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;">
<!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;">
<!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;">
]>
<lolz>&lol9;</lolz>"""
quadratic_blowup = f"""<?xml version="1.0"?>
<!DOCTYPE wolo [
<!ENTITY oops "{"a" * 100000}">
]>
<foo>{"&oops;"*20000}</foo>"""
dtd_retrieval = f"""<?xml version="1.0"?>
<!DOCTYPE dt PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://{HOST}:{PORT}/test.dtd">
<foo>bar</foo>
"""
# ==============================================================================
# other setup
# we set up local Flask application so we can tests whether loading external resources
# works (such as SSRF from DTD-retrival works)
app = Flask(__name__)
@app.route("/alive")
def alive():
return "ok"
hit_dtd = False
@app.route("/test.dtd")
def test_dtd():
global hit_dtd
hit_dtd = True
return """<?xml version="1.0" encoding="UTF-8"?>"""
hit_xxe = False
@app.route("/xxe")
def test_xxe():
global hit_xxe
hit_xxe = True
return "ok"
def run_app():
app.run(host=HOST, port=PORT)
@pytest.fixture(scope="session", autouse=True)
def flask_app_running():
# run flask in other thread
flask_thread = threading.Thread(target=run_app, daemon=True)
flask_thread.start()
# give flask a bit of time to start
time.sleep(0.1)
# ensure that the server works
import requests
requests.get(f"http://{HOST}:{PORT}/alive")
yield
def expects_timeout(func):
def inner():
proc = multiprocessing.Process(target=func)
proc.start()
time.sleep(0.1)
assert proc.exitcode == None
proc.kill()
proc.join()
return inner
class TestExpectsTimeout:
"test that expects_timeout works as expected"
@staticmethod
@expects_timeout
def test_slow():
time.sleep(1000)
@staticmethod
def test_fast():
@expects_timeout
def fast_func():
return "done!"
with pytest.raises(AssertionError):
fast_func()
# ==============================================================================
import xml.sax
import xml.sax.handler
class SimpleHandler(xml.sax.ContentHandler):
def __init__(self):
self.result = []
def characters(self, data):
self.result.append(data)
class TestSax():
# always vuln to billion laughs, quadratic
@staticmethod
@expects_timeout
def test_billion_laughs_allowed_by_default():
parser = xml.sax.make_parser()
parser.parse(StringIO(billion_laughs))
@staticmethod
@expects_timeout
def test_quardratic_blowup_allowed_by_default():
parser = xml.sax.make_parser()
parser.parse(StringIO(quadratic_blowup))
@staticmethod
def test_ok_xml():
handler = SimpleHandler()
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.parse(StringIO(ok_xml))
assert handler.result == ["hello world"], handler.result
@staticmethod
def test_xxe_disabled_by_default():
handler = SimpleHandler()
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.parse(StringIO(local_xxe))
assert handler.result == [], handler.result
@staticmethod
def test_local_xxe_manually_enabled():
handler = SimpleHandler()
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(local_xxe))
assert handler.result[0] == "SECRET_FLAG", handler.result
@staticmethod
def test_remote_xxe_manually_enabled():
global hit_xxe
hit_xxe = False
handler = SimpleHandler()
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(remote_xxe))
assert handler.result == ["ok"], handler.result
assert hit_xxe == True
@staticmethod
def test_dtd_disabled_by_default():
global hit_dtd
hit_dtd = False
parser = xml.sax.make_parser()
parser.parse(StringIO(dtd_retrieval))
assert hit_dtd == False
@staticmethod
def test_dtd_manually_enabled():
global hit_dtd
hit_dtd = False
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(dtd_retrieval))
assert hit_dtd == True
# ==============================================================================
import xml.etree.ElementTree
class TestEtree:
# always vuln to billion laughs, quadratic
@staticmethod
@expects_timeout
def test_billion_laughs_allowed_by_default():
parser = xml.etree.ElementTree.XMLParser()
_root = xml.etree.ElementTree.fromstring(billion_laughs, parser=parser)
@staticmethod
@expects_timeout
def test_quardratic_blowup_allowed_by_default():
parser = xml.etree.ElementTree.XMLParser()
_root = xml.etree.ElementTree.fromstring(quadratic_blowup, parser=parser)
@staticmethod
def test_ok_xml():
parser = xml.etree.ElementTree.XMLParser()
root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser)
assert root.tag == "test"
assert root.text == "hello world"
@staticmethod
def test_ok_xml_sax_parser():
# you _can_ pass a SAX parser to xml.etree... but it doesn't give you the output :|
parser = xml.sax.make_parser()
root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser)
assert root == None
@staticmethod
def test_ok_xml_lxml_parser():
# this is technically possible, since parsers follow the same API, and the
# `fromstring` function is just a thin wrapper... seems very unlikely that
# anyone would do this though :|
parser = lxml.etree.XMLParser()
root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser)
assert root.tag == "test"
assert root.text == "hello world"
@staticmethod
def test_xxe_not_possible():
parser = xml.etree.ElementTree.XMLParser()
try:
_root = xml.etree.ElementTree.fromstring(local_xxe, parser=parser)
assert False
except xml.etree.ElementTree.ParseError as e:
assert "undefined entity &xxe" in str(e)
@staticmethod
def test_dtd_not_possible():
global hit_dtd
hit_dtd = False
parser = xml.etree.ElementTree.XMLParser()
_root = xml.etree.ElementTree.fromstring(dtd_retrieval, parser=parser)
assert hit_dtd == False
# ==============================================================================
import lxml.etree
class TestLxml:
# see https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
@staticmethod
def test_billion_laughs_disabled_by_default():
parser = lxml.etree.XMLParser()
try:
_root = lxml.etree.fromstring(billion_laughs, parser=parser)
assert False
except lxml.etree.XMLSyntaxError as e:
assert "Detected an entity reference loop" in str(e)
@staticmethod
def test_quardratic_blowup_disabled_by_default():
parser = lxml.etree.XMLParser()
try:
_root = lxml.etree.fromstring(quadratic_blowup, parser=parser)
assert False
except lxml.etree.XMLSyntaxError as e:
assert "Detected an entity reference loop" in str(e)
@staticmethod
@expects_timeout
def test_billion_laughs_manually_enabled():
parser = lxml.etree.XMLParser(huge_tree=True)
root = lxml.etree.fromstring(billion_laughs, parser=parser)
@staticmethod
@expects_timeout
def test_quadratic_blowup_manually_enabled():
parser = lxml.etree.XMLParser(huge_tree=True)
root = lxml.etree.fromstring(quadratic_blowup, parser=parser)
@staticmethod
def test_billion_laughs_huge_tree_not_enough():
parser = lxml.etree.XMLParser(huge_tree=True, resolve_entities=False)
root = lxml.etree.fromstring(billion_laughs, parser=parser)
assert root.tag == "lolz"
assert root.text == None
@staticmethod
def test_quadratic_blowup_huge_tree_not_enough():
parser = lxml.etree.XMLParser(huge_tree=True, resolve_entities=False)
root = lxml.etree.fromstring(quadratic_blowup, parser=parser)
assert root.tag == "foo"
assert root.text == None
@staticmethod
def test_ok_xml():
parser = lxml.etree.XMLParser()
root = lxml.etree.fromstring(ok_xml, parser=parser)
assert root.tag == "test"
assert root.text == "hello world"
@staticmethod
def test_local_xxe_enabled_by_default():
parser = lxml.etree.XMLParser()
root = lxml.etree.fromstring(local_xxe, parser=parser)
assert root.tag == "test"
assert root.text == "SECRET_FLAG\n", root.text
@staticmethod
def test_local_xxe_disabled():
parser = lxml.etree.XMLParser(resolve_entities=False)
root = lxml.etree.fromstring(local_xxe, parser=parser)
assert root.tag == "test"
assert root.text == None
@staticmethod
def test_remote_xxe_disabled_by_default():
global hit_xxe
hit_xxe = False
parser = lxml.etree.XMLParser()
try:
root = lxml.etree.fromstring(remote_xxe, parser=parser)
assert False
except lxml.etree.XMLSyntaxError as e:
assert "Failure to process entity remote_xxe" in str(e)
assert hit_xxe == False
@staticmethod
def test_remote_xxe_manually_enabled():
global hit_xxe
hit_xxe = False
parser = lxml.etree.XMLParser(no_network=False)
root = lxml.etree.fromstring(remote_xxe, parser=parser)
assert root.tag == "test"
assert root.text == "ok"
assert hit_xxe == True
@staticmethod
def test_dtd_disabled_by_default():
global hit_dtd
hit_dtd = False
parser = lxml.etree.XMLParser()
root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
assert hit_dtd == False
@staticmethod
def test_dtd_manually_enabled():
global hit_dtd
hit_dtd = False
# Need to set BOTH load_dtd and no_network
parser = lxml.etree.XMLParser(load_dtd=True)
root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
assert hit_dtd == False
parser = lxml.etree.XMLParser(no_network=False)
root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
assert hit_dtd == False
parser = lxml.etree.XMLParser(load_dtd=True, no_network=False)
root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
assert hit_dtd == True
hit_dtd = False
# Setting dtd_validation also does not allow the remote access
parser = lxml.etree.XMLParser(dtd_validation=True, load_dtd=True)
try:
root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
except lxml.etree.XMLSyntaxError:
pass
assert hit_dtd == False
# ==============================================================================
import xmltodict
class TestXmltodict:
@staticmethod
def test_billion_laughs_disabled_by_default():
d = xmltodict.parse(billion_laughs)
assert d == {"lolz": None}, d
@staticmethod
def test_quardratic_blowup_disabled_by_default():
d = xmltodict.parse(quadratic_blowup)
assert d == {"foo": None}, d
@staticmethod
@expects_timeout
def test_billion_laughs_manually_enabled():
xmltodict.parse(billion_laughs, disable_entities=False)
@staticmethod
@expects_timeout
def test_quardratic_blowup_manually_enabled():
xmltodict.parse(quadratic_blowup, disable_entities=False)
@staticmethod
def test_ok_xml():
d = xmltodict.parse(ok_xml)
assert d == {"test": "hello world"}, d
@staticmethod
def test_local_xxe_not_possible():
d = xmltodict.parse(local_xxe)
assert d == {"test": None}
d = xmltodict.parse(local_xxe, disable_entities=False)
assert d == {"test": None}
@staticmethod
def test_remote_xxe_not_possible():
global hit_xxe
hit_xxe = False
d = xmltodict.parse(remote_xxe)
assert d == {"test": None}
assert hit_xxe == False
d = xmltodict.parse(remote_xxe, disable_entities=False)
assert d == {"test": None}
assert hit_xxe == False
@staticmethod
def test_dtd_not_possible():
global hit_dtd
hit_dtd = False
d = xmltodict.parse(dtd_retrieval)
assert hit_dtd == False
# ==============================================================================
import xml.dom.minidom
class TestMinidom:
@staticmethod
@expects_timeout
def test_billion_laughs():
xml.dom.minidom.parseString(billion_laughs)
@staticmethod
@expects_timeout
def test_quardratic_blowup():
xml.dom.minidom.parseString(quadratic_blowup)
@staticmethod
def test_ok_xml():
doc = xml.dom.minidom.parseString(ok_xml)
assert doc.documentElement.tagName == "test"
assert doc.documentElement.childNodes[0].data == "hello world"
@staticmethod
def test_xxe():
# disabled by default
doc = xml.dom.minidom.parseString(local_xxe)
assert doc.documentElement.tagName == "test"
assert doc.documentElement.childNodes == []
# but can be turned on
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
doc = xml.dom.minidom.parseString(local_xxe, parser=parser)
assert doc.documentElement.tagName == "test"
assert doc.documentElement.childNodes[0].data == "SECRET_FLAG"
# which also works remotely
global hit_xxe
hit_xxe = False
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
_doc = xml.dom.minidom.parseString(remote_xxe, parser=parser)
assert hit_xxe == True
@staticmethod
def test_dtd():
# not possible by default
global hit_dtd
hit_dtd = False
_doc = xml.dom.minidom.parseString(dtd_retrieval)
assert hit_dtd == False
# but can be turned on
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
_doc = xml.dom.minidom.parseString(dtd_retrieval, parser=parser)
assert hit_dtd == True
# ==============================================================================
import xml.dom.pulldom
class TestPulldom:
@staticmethod
@expects_timeout
def test_billion_laughs():
doc = xml.dom.pulldom.parseString(billion_laughs)
# you NEED to iterate over the items for it to take long
for event, node in doc:
pass
@staticmethod
@expects_timeout
def test_quardratic_blowup():
doc = xml.dom.pulldom.parseString(quadratic_blowup)
for event, node in doc:
pass
@staticmethod
def test_ok_xml():
doc = xml.dom.pulldom.parseString(ok_xml)
for event, node in doc:
if event == xml.dom.pulldom.START_ELEMENT:
assert node.tagName == "test"
elif event == xml.dom.pulldom.CHARACTERS:
assert node.data == "hello world"
@staticmethod
def test_xxe():
# disabled by default
doc = xml.dom.pulldom.parseString(local_xxe)
found_flag = False
for event, node in doc:
if event == xml.dom.pulldom.START_ELEMENT:
assert node.tagName == "test"
elif event == xml.dom.pulldom.CHARACTERS:
if node.data == "SECRET_FLAG":
found_flag = True
assert found_flag == False
# but can be turned on
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
doc = xml.dom.pulldom.parseString(local_xxe, parser=parser)
found_flag = False
for event, node in doc:
if event == xml.dom.pulldom.START_ELEMENT:
assert node.tagName == "test"
elif event == xml.dom.pulldom.CHARACTERS:
if node.data == "SECRET_FLAG":
found_flag = True
assert found_flag == True
# which also works remotely
global hit_xxe
hit_xxe = False
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
doc = xml.dom.pulldom.parseString(remote_xxe, parser=parser)
assert hit_xxe == False
for event, node in doc:
pass
assert hit_xxe == True
@staticmethod
def test_dtd():
# not possible by default
global hit_dtd
hit_dtd = False
doc = xml.dom.pulldom.parseString(dtd_retrieval)
for event, node in doc:
pass
assert hit_dtd == False
# but can be turned on
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
doc = xml.dom.pulldom.parseString(dtd_retrieval, parser=parser)
for event, node in doc:
pass
assert hit_dtd == True
# ==============================================================================
import xml.parsers.expat
class TestExpat:
# this is the underlying parser implementation used by the rest of the Python
# standard library. But people are probably not using this directly.
@staticmethod
@expects_timeout
def test_billion_laughs():
parser = xml.parsers.expat.ParserCreate()
parser.Parse(billion_laughs, True)
@staticmethod
@expects_timeout
def test_quardratic_blowup():
parser = xml.parsers.expat.ParserCreate()
parser.Parse(quadratic_blowup, True)
@staticmethod
def test_ok_xml():
char_data_recv = []
def char_data_handler(data):
char_data_recv.append(data)
parser = xml.parsers.expat.ParserCreate()
parser.CharacterDataHandler = char_data_handler
parser.Parse(ok_xml, True)
assert char_data_recv == ["hello world"]
@staticmethod
def test_xxe():
# not vuln by default
char_data_recv = []
def char_data_handler(data):
char_data_recv.append(data)
parser = xml.parsers.expat.ParserCreate()
parser.CharacterDataHandler = char_data_handler
parser.Parse(local_xxe, True)
assert char_data_recv == []
# there might be ways to make it vuln, but I did not investigate futher.
@staticmethod
def test_dtd():
# not vuln by default
global hit_dtd
hit_dtd = False
parser = xml.parsers.expat.ParserCreate()
parser.Parse(dtd_retrieval, True)
assert hit_dtd == False
# there might be ways to make it vuln, but I did not investigate futher.

View File

@@ -1,31 +0,0 @@
from io import StringIO
import xml.dom.minidom
import xml.dom.pulldom
import xml.sax
x = "some xml"
# minidom
xml.dom.minidom.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.dom.minidom.parse(file=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.dom.minidom.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.dom.minidom.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
# pulldom
xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
# These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/)
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
xml.dom.minidom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
xml.dom.minidom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
xml.dom.pulldom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'

View File

@@ -1,45 +0,0 @@
from io import StringIO
import xml.etree.ElementTree
x = "some xml"
# Parsing in different ways
xml.etree.ElementTree.fromstring(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.fromstring(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.fromstringlist([x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.XML(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.XML(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.XMLID(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.XMLID(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.iterparse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
# With parsers (no options available to disable/enable security features)
parser = xml.etree.ElementTree.XMLParser()
xml.etree.ElementTree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
# manual use of feed method
parser = xml.etree.ElementTree.XMLParser()
parser.feed(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
parser.close()
# manual use of feed method on XMLPullParser
parser = xml.etree.ElementTree.XMLPullParser()
parser.feed(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
parser.close()
# note: it's technically possible to use the thing wrapper func `fromstring` with an
# `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it
# seems very unlikely that anyone would do this, so we have intentionally not added any
# tests for this.

View File

@@ -1,64 +0,0 @@
from io import StringIO
import xml.sax
x = "some xml"
class MainHandler(xml.sax.ContentHandler):
def __init__(self):
self._result = []
def characters(self, data):
self._result.append(data)
xml.sax.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.sax.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.sax.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.sax.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
parser = xml.sax.make_parser()
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
parser.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
# You can make it vuln to both XXE and DTD retrieval by setting this flag
# see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
# Forward Type Tracking test
def func(cond):
parser = xml.sax.make_parser()
if cond:
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
else:
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
# make it vuln, then making it safe
# a bit of an edge-case, but is nice to be able to handle.
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
def check_conditional_assignment(cond):
parser = xml.sax.make_parser()
if cond:
parser.setFeature(xml.sax.handler.feature_external_ges, True)
else:
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
def check_conditional_assignment2(cond):
parser = xml.sax.make_parser()
if cond:
flag_value = True
else:
flag_value = False
parser.setFeature(xml.sax.handler.feature_external_ges, flag_value)
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'

View File

@@ -1,8 +0,0 @@
import xmltodict
x = "some xml"
xmltodict.parse(x) # $ input=x
xmltodict.parse(xml_input=x) # $ input=x
xmltodict.parse(x, disable_entities=False) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'

View File

@@ -539,3 +539,20 @@ class HttpClientRequestTest extends InlineExpectationsTest {
)
}
}
class XmlParsingTest extends InlineExpectationsTest {
XmlParsingTest() { this = "XmlParsingTest" }
override string getARelevantTag() { result = "xmlVuln" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(XML::XmlParsing parsing, XML::XmlParsingVulnerabilityKind kind |
parsing.vulnerableTo(kind) and
location = parsing.getLocation() and
element = parsing.toString() and
value = "'" + kind + "'" and
tag = "xmlVuln"
)
}
}

View File

@@ -0,0 +1 @@
| xmlrpc_server.py:7:10:7:48 | ControlFlowNode for SimpleXMLRPCServer() | SimpleXMLRPCServer is vulnerable to XML bombs |

View File

@@ -1 +0,0 @@
| xmlrpc_server.py:7:10:7:48 | ControlFlowNode for SimpleXMLRPCServer() | SimpleXMLRPCServer is vulnerable to: Billion Laughs, Quadratic Blowup. |

View File

@@ -1 +0,0 @@
experimental/Security/CWE-611/XmlEntityInjection.ql

View File

@@ -0,0 +1,67 @@
from io import StringIO
import lxml.etree
x = "some xml"
# different parsing methods
lxml.etree.fromstring(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
lxml.etree.fromstring(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
lxml.etree.fromstringlist([x]) # $ decodeFormat=XML decodeInput=List xmlVuln='XXE' decodeOutput=lxml.etree.fromstringlist(..)
lxml.etree.fromstringlist(strings=[x]) # $ decodeFormat=XML decodeInput=List xmlVuln='XXE' decodeOutput=lxml.etree.fromstringlist(..)
lxml.etree.XML(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XML(..)
lxml.etree.XML(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XML(..)
lxml.etree.XMLID(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XMLID(..)
lxml.etree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XMLID(..)
xml_file = 'xml_file'
lxml.etree.parse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) getAPathArgument=xml_file
lxml.etree.parse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) getAPathArgument=xml_file
lxml.etree.parseid(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) getAPathArgument=xml_file
lxml.etree.parseid(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) getAPathArgument=xml_file
lxml.etree.iterparse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file
lxml.etree.iterparse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file
# With default parsers (nothing changed)
parser = lxml.etree.XMLParser()
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
parser = lxml.etree.get_default_parser()
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
# manual use of feed method
parser = lxml.etree.XMLParser()
parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE'
parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE'
parser.close() # $ decodeOutput=parser.close()
# XXE-safe
parser = lxml.etree.XMLParser(resolve_entities=False)
lxml.etree.fromstring(x, parser) # $ decodeFormat=XML decodeInput=x decodeOutput=lxml.etree.fromstring(..)
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x decodeOutput=lxml.etree.fromstring(..)
# XXE-vuln
parser = lxml.etree.XMLParser(resolve_entities=True)
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
# Billion laughs vuln (also XXE)
parser = lxml.etree.XMLParser(huge_tree=True)
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
# Safe for both Billion laughs and XXE
parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True)
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x decodeOutput=lxml.etree.fromstring(..)
# DTD retrival vuln (also XXE)
parser = lxml.etree.XMLParser(load_dtd=True, no_network=False)
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
# iterparse configurations ... this doesn't use a parser argument but takes MOST (!) of
# the normal XMLParser arguments. Specifically, it doesn't allow disabling XXE :O
lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XML bomb' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file
lxml.etree.iterparse(xml_file, load_dtd=True, no_network=False) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file

View File

@@ -1,21 +0,0 @@
from lxml import etree
from io import StringIO
def test_parse():
tree = etree.parse(StringIO('<foo><bar></bar></foo>'))
r = tree.xpath('/foo/bar') # $ getXPath='/foo/bar'
def test_XPath_class():
root = etree.XML("<root><a>TEXT</a></root>")
find_text = etree.XPath("path") # $ constructedXPath="path"
text = find_text(root)[0]
def test_ETXpath_class():
root = etree.XML("<root><a>TEXT</a></root>")
find_text = etree.ETXPath("path") # $ constructedXPath="path"
text = find_text(root)[0]
def test_XPathEvaluator_class():
root = etree.XML("<root><a>TEXT</a></root>")
search_root = etree.XPathEvaluator(root)
text = search_root("path")[0] # $ getXPath="path"

View File

@@ -0,0 +1,21 @@
from lxml import etree
from io import StringIO
def test_parse():
tree = etree.parse(StringIO('<foo><bar></bar></foo>')) # $ decodeFormat=XML decodeInput=StringIO(..) decodeOutput=etree.parse(..) xmlVuln='XXE' getAPathArgument=StringIO(..)
r = tree.xpath('/foo/bar') # $ getXPath='/foo/bar'
def test_XPath_class():
root = etree.XML("<root><a>TEXT</a></root>") # $ decodeFormat=XML decodeInput="<root><a>TEXT</a></root>" decodeOutput=etree.XML(..) xmlVuln='XXE'
find_text = etree.XPath("path") # $ constructedXPath="path"
text = find_text(root)[0]
def test_ETXpath_class():
root = etree.XML("<root><a>TEXT</a></root>") # $ decodeFormat=XML decodeInput="<root><a>TEXT</a></root>" decodeOutput=etree.XML(..) xmlVuln='XXE'
find_text = etree.ETXPath("path") # $ constructedXPath="path"
text = find_text(root)[0]
def test_XPathEvaluator_class():
root = etree.XML("<root><a>TEXT</a></root>") # $ decodeFormat=XML decodeInput="<root><a>TEXT</a></root>" decodeOutput=etree.XML(..) xmlVuln='XXE'
search_root = etree.XPathEvaluator(root)
text = search_root("path")[0] # $ getXPath="path"

View File

@@ -2,7 +2,7 @@ match = "dc:title"
ns = {'dc': 'http://purl.org/dc/elements/1.1/'}
import xml.etree.ElementTree as ET
tree = ET.parse('country_data.xml')
tree = ET.parse('country_data.xml') # $ decodeFormat=XML decodeInput='country_data.xml' decodeOutput=ET.parse(..) xmlVuln='XML bomb' getAPathArgument='country_data.xml'
root = tree.getroot()
root.find(match, namespaces=ns) # $ getXPath=match
@@ -10,8 +10,13 @@ root.findall(match, namespaces=ns) # $ getXPath=match
root.findtext(match, default=None, namespaces=ns) # $ getXPath=match
tree = ET.ElementTree()
tree.parse("index.xhtml")
tree.parse("index.xhtml") # $ decodeFormat=XML decodeInput="index.xhtml" decodeOutput=tree.parse(..) xmlVuln='XML bomb' getAPathArgument="index.xhtml"
tree.find(match, namespaces=ns) # $ getXPath=match
tree.findall(match, namespaces=ns) # $ getXPath=match
tree.findtext(match, default=None, namespaces=ns) # $ getXPath=match
parser = ET.XMLParser()
parser.feed("<foo>bar</foo>") # $ decodeFormat=XML decodeInput="<foo>bar</foo>" xmlVuln='XML bomb'
tree = parser.close() # $ decodeOutput=parser.close()
tree.find(match, namespaces=ns) # $ getXPath=match

View File

@@ -0,0 +1,47 @@
from io import StringIO, BytesIO
TAINTED_STRING = "TS"
TAINTED_BYTES = b"TB"
def ensure_tainted(*args):
print("ensure_tainted")
for arg in args:
print("", repr(arg))
def test_stringio():
ts = TAINTED_STRING
x = StringIO()
x.write(ts)
x.seek(0)
ensure_tainted(
StringIO(ts), # $ tainted
StringIO(initial_value=ts), # $ tainted
x, # $ tainted
x.read(), # $ tainted
StringIO(ts).read(), # $ tainted
)
def test_bytesio():
tb = TAINTED_BYTES
x = BytesIO()
x.write(tb)
x.seek(0)
ensure_tainted(
BytesIO(tb), # $ tainted
BytesIO(initial_bytes=tb), # $ tainted
x, # $ tainted
x.read(), # $ tainted
BytesIO(tb).read(), # $ tainted
)
test_stringio()
test_bytesio()

View File

@@ -0,0 +1,31 @@
from io import StringIO
import xml.dom.minidom
import xml.dom.pulldom
import xml.sax
x = "some xml"
# minidom
xml.dom.minidom.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..)
xml.dom.minidom.parse(file=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..)
xml.dom.minidom.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.dom.minidom.parseString(..)
xml.dom.minidom.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.dom.minidom.parseString(..)
# pulldom
xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..)
xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..)
xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.dom.pulldom.parseString(..)
xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.dom.pulldom.parseString(..)
# These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/)
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
xml.dom.minidom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..)
xml.dom.minidom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..)
xml.dom.pulldom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..)
xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..)

View File

@@ -0,0 +1,49 @@
from io import StringIO
import xml.etree.ElementTree
x = "some xml"
# Parsing in different ways
xml.etree.ElementTree.fromstring(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.fromstring(..)
xml.etree.ElementTree.fromstring(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.fromstring(..)
xml.etree.ElementTree.fromstringlist([x]) # $ decodeFormat=XML decodeInput=List xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.fromstringlist(..)
xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ decodeFormat=XML decodeInput=List xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.fromstringlist(..)
xml.etree.ElementTree.XML(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.XML(..)
xml.etree.ElementTree.XML(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.XML(..)
xml.etree.ElementTree.XMLID(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.XMLID(..)
xml.etree.ElementTree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.XMLID(..)
xml.etree.ElementTree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.parse(..) getAPathArgument=StringIO(..)
xml.etree.ElementTree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.parse(..) getAPathArgument=StringIO(..)
xml.etree.ElementTree.iterparse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..)
xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..)
tree = xml.etree.ElementTree.ElementTree()
tree.parse("file.xml") # $ decodeFormat=XML decodeInput="file.xml" xmlVuln='XML bomb' decodeOutput=tree.parse(..) getAPathArgument="file.xml"
tree.parse(source="file.xml") # $ decodeFormat=XML decodeInput="file.xml" xmlVuln='XML bomb' decodeOutput=tree.parse(..) getAPathArgument="file.xml"
# With parsers (no options available to disable/enable security features)
parser = xml.etree.ElementTree.XMLParser()
xml.etree.ElementTree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.fromstring(..)
# manual use of feed method
parser = xml.etree.ElementTree.XMLParser()
parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb'
parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb'
parser.close() # $ decodeOutput=parser.close()
# manual use of feed method on XMLPullParser
parser = xml.etree.ElementTree.XMLPullParser()
parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb'
parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb'
parser.close() # $ decodeOutput=parser.close()
# note: it's technically possible to use the thing wrapper func `fromstring` with an
# `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it
# seems very unlikely that anyone would do this, so we have intentionally not added any
# tests for this.

View File

@@ -0,0 +1,64 @@
from io import StringIO
import xml.sax
x = "some xml"
class MainHandler(xml.sax.ContentHandler):
def __init__(self):
self._result = []
def characters(self, data):
self._result.append(data)
xml.sax.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..)
xml.sax.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..)
xml.sax.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb'
xml.sax.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb'
parser = xml.sax.make_parser()
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..)
parser.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..)
# You can make it vuln to both XXE and DTD retrieval by setting this flag
# see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' getAPathArgument=StringIO(..)
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..)
# Forward Type Tracking test
def func(cond):
parser = xml.sax.make_parser()
if cond:
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' getAPathArgument=StringIO(..)
else:
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..)
# make it vuln, then making it safe
# a bit of an edge-case, but is nice to be able to handle.
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..)
def check_conditional_assignment(cond):
parser = xml.sax.make_parser()
if cond:
parser.setFeature(xml.sax.handler.feature_external_ges, True)
else:
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' getAPathArgument=StringIO(..)
def check_conditional_assignment2(cond):
parser = xml.sax.make_parser()
if cond:
flag_value = True
else:
flag_value = False
parser.setFeature(xml.sax.handler.feature_external_ges, flag_value)
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' getAPathArgument=StringIO(..)

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,8 @@
import xmltodict
x = "some xml"
xmltodict.parse(x) # $ decodeFormat=XML decodeInput=x decodeOutput=xmltodict.parse(..)
xmltodict.parse(xml_input=x) # $ decodeFormat=XML decodeInput=x decodeOutput=xmltodict.parse(..)
xmltodict.parse(x, disable_entities=False) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xmltodict.parse(..)

View File

@@ -2,9 +2,6 @@ edges
| test.py:8:19:8:25 | ControlFlowNode for request | test.py:8:19:8:30 | ControlFlowNode for Attribute |
| test.py:8:19:8:30 | ControlFlowNode for Attribute | test.py:8:19:8:45 | ControlFlowNode for Subscript |
| test.py:8:19:8:45 | ControlFlowNode for Subscript | test.py:9:34:9:44 | ControlFlowNode for xml_content |
| test.py:13:19:13:25 | ControlFlowNode for request | test.py:13:19:13:30 | ControlFlowNode for Attribute |
| test.py:13:19:13:30 | ControlFlowNode for Attribute | test.py:13:19:13:45 | ControlFlowNode for Subscript |
| test.py:13:19:13:45 | ControlFlowNode for Subscript | test.py:15:34:15:44 | ControlFlowNode for xml_content |
| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:19:19:30 | ControlFlowNode for Attribute |
| test.py:19:19:19:30 | ControlFlowNode for Attribute | test.py:19:19:19:45 | ControlFlowNode for Subscript |
| test.py:19:19:19:45 | ControlFlowNode for Subscript | test.py:30:34:30:44 | ControlFlowNode for xml_content |
@@ -13,15 +10,11 @@ nodes
| test.py:8:19:8:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:8:19:8:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| test.py:9:34:9:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
| test.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| test.py:15:34:15:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
subpaths
#select
| test.py:9:34:9:44 | ControlFlowNode for xml_content | test.py:8:19:8:25 | ControlFlowNode for request | test.py:9:34:9:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | test.py:9:34:9:44 | ControlFlowNode for xml_content | This | test.py:8:19:8:25 | ControlFlowNode for request | user-provided value |
| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:19:19:19:25 | ControlFlowNode for request | test.py:30:34:30:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | test.py:30:34:30:44 | ControlFlowNode for xml_content | This | test.py:19:19:19:25 | ControlFlowNode for request | user-provided value |
| test.py:9:34:9:44 | ControlFlowNode for xml_content | test.py:8:19:8:25 | ControlFlowNode for request | test.py:9:34:9:44 | ControlFlowNode for xml_content | A $@ is parsed as XML without guarding against external entity expansion. | test.py:8:19:8:25 | ControlFlowNode for request | user-provided value |
| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:19:19:19:25 | ControlFlowNode for request | test.py:30:34:30:44 | ControlFlowNode for xml_content | A $@ is parsed as XML without guarding against external entity expansion. | test.py:19:19:19:25 | ControlFlowNode for request | user-provided value |

View File

@@ -0,0 +1 @@
Security/CWE-611/Xxe.ql

View File

@@ -0,0 +1,12 @@
edges
| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:19:19:30 | ControlFlowNode for Attribute |
| test.py:19:19:19:30 | ControlFlowNode for Attribute | test.py:19:19:19:45 | ControlFlowNode for Subscript |
| test.py:19:19:19:45 | ControlFlowNode for Subscript | test.py:30:34:30:44 | ControlFlowNode for xml_content |
nodes
| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
subpaths
#select
| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:19:19:19:25 | ControlFlowNode for request | test.py:30:34:30:44 | ControlFlowNode for xml_content | A $@ is parsed as XML without guarding against uncontrolled entity expansion. | test.py:19:19:19:25 | ControlFlowNode for request | user-provided value |

View File

@@ -0,0 +1 @@
Security/CWE-776/XmlBomb.ql

View File

@@ -0,0 +1,30 @@
from flask import Flask, request
import lxml.etree
app = Flask(__name__)
@app.route("/vuln-handler")
def vuln_handler():
xml_content = request.args['xml_content']
return lxml.etree.fromstring(xml_content).text
@app.route("/safe-handler")
def safe_handler():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(resolve_entities=False)
return lxml.etree.fromstring(xml_content, parser=parser).text
@app.route("/super-vuln-handler")
def super_vuln_handler():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(
# allows XXE
resolve_entities=True,
# allows remote XXE
no_network=False,
# together with `no_network=False`, allows DTD-retrival
load_dtd=True,
# allows DoS attacks
huge_tree=True,
)
return lxml.etree.fromstring(xml_content, parser=parser).text