mirror of
https://github.com/github/codeql.git
synced 2026-05-02 04:05:14 +02:00
Merge pull request #6112 from jorgectf/jorgectf/python/deserialization
Python: Port and extend XXE modeling
This commit is contained in:
@@ -0,0 +1,25 @@
|
||||
/**
|
||||
* @name SimpleXMLRPCServer DoS vulnerability
|
||||
* @description SimpleXMLRPCServer is vulnerable to DoS attacks from untrusted user input
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @precision high
|
||||
* @id py/simple-xml-rpc-server-dos
|
||||
* @tags security
|
||||
* external/cwe/cwe-776
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import experimental.semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
from DataFlow::CallCfgNode call, string kinds
|
||||
where
|
||||
call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and
|
||||
kinds =
|
||||
strictconcat(ExperimentalXML::XMLVulnerabilityKind kind |
|
||||
kind.isBillionLaughs() or kind.isQuadraticBlowup()
|
||||
|
|
||||
kind, ", "
|
||||
)
|
||||
select call, "SimpleXMLRPCServer is vulnerable to: " + kinds + "."
|
||||
4
python/ql/src/experimental/Security/CWE-611/XXE.xml
Normal file
4
python/ql/src/experimental/Security/CWE-611/XXE.xml
Normal file
@@ -0,0 +1,4 @@
|
||||
<?xml version="1.0"?>
|
||||
<!DOCTYPE dt [
|
||||
<!ENTITY xxe SYSTEM "file:///etc/passwd">]>
|
||||
<test>&xxe;</test>
|
||||
@@ -0,0 +1,25 @@
|
||||
from flask import request, Flask
|
||||
import lxml.etree
|
||||
import xml.etree.ElementTree
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
# BAD
|
||||
@app.route("/bad")
|
||||
def bad():
|
||||
xml_content = request.args['xml_content']
|
||||
|
||||
parser = lxml.etree.XMLParser()
|
||||
parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
|
||||
|
||||
return parsed_xml.text
|
||||
|
||||
# GOOD
|
||||
@app.route("/good")
|
||||
def good():
|
||||
xml_content = request.args['xml_content']
|
||||
|
||||
parser = lxml.etree.XMLParser(resolve_entities=False)
|
||||
parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
|
||||
|
||||
return parsed_xml.text
|
||||
@@ -0,0 +1,48 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
Parsing untrusted XML files with a weakly configured XML parser may lead to attacks such as XML External Entity (XXE),
|
||||
Billion Laughs, Quadratic Blowup and DTD retrieval.
|
||||
This type of attack uses external entity references to access arbitrary files on a system, carry out denial of
|
||||
service, or server side request forgery. Even when the result of parsing is not returned to the user, out-of-band
|
||||
data retrieval techniques may allow attackers to steal sensitive data. Denial of services can also be carried out
|
||||
in this situation.
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>
|
||||
Use <a href="https://pypi.org/project/defusedxml/">defusedxml</a>, a Python package aimed
|
||||
to prevent any potentially malicious operation.
|
||||
</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>
|
||||
The following example calls <code>xml.etree.ElementTree.fromstring</code> using a parser (<code>lxml.etree.XMLParser</code>)
|
||||
that is not safely configured on untrusted data, and is therefore inherently unsafe.
|
||||
</p>
|
||||
<sample src="XmlEntityInjection.py"/>
|
||||
<p>
|
||||
Providing an input (<code>xml_content</code>) like the following XML content against /bad, the request response would contain the contents of
|
||||
<code>/etc/passwd</code>.
|
||||
</p>
|
||||
<sample src="XXE.xml"/>
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>Python 3 <a href="https://docs.python.org/3/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.</li>
|
||||
<li>Python 2 <a href="https://docs.python.org/2/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.</li>
|
||||
<li>Python <a href="https://www.edureka.co/blog/python-xml-parser-tutorial/">XML Parsing</a>.</li>
|
||||
<li>OWASP vulnerability description: <a href="https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Processing">XML External Entity (XXE) Processing</a>.</li>
|
||||
<li>OWASP guidance on parsing xml files: <a href="https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#python">XXE Prevention Cheat Sheet</a>.</li>
|
||||
<li>Paper by Timothy Morgen: <a href="https://research.nccgroup.com/2014/05/19/xml-schema-dtd-and-entity-attacks-a-compendium-of-known-techniques/">XML Schema, DTD, and Entity Attacks</a></li>
|
||||
<li>Out-of-band data retrieval: Timur Yunusov & Alexey Osipov, Black hat EU 2013: <a href="https://www.slideshare.net/qqlan/bh-ready-v4">XML Out-Of-Band Data Retrieval</a>.</li>
|
||||
<li>Denial of service attack (Billion laughs): <a href="https://en.wikipedia.org/wiki/Billion_laughs">Billion Laughs.</a></li>
|
||||
</references>
|
||||
|
||||
</qhelp>
|
||||
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* @name XML Entity injection
|
||||
* @description User input should not be parsed allowing the injection of entities.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/xml-entity-injection
|
||||
* @tags security
|
||||
* external/cwe/cwe-611
|
||||
* external/cwe/cwe-776
|
||||
* external/cwe/cwe-827
|
||||
*/
|
||||
|
||||
// determine precision above
|
||||
import python
|
||||
import experimental.semmle.python.security.dataflow.XmlEntityInjection
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from
|
||||
XmlEntityInjection::XmlEntityInjectionConfiguration config, DataFlow::PathNode source,
|
||||
DataFlow::PathNode sink, string kinds
|
||||
where
|
||||
config.hasFlowPath(source, sink) and
|
||||
kinds =
|
||||
strictconcat(string kind |
|
||||
kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind()
|
||||
|
|
||||
kind, ", "
|
||||
)
|
||||
select sink.getNode(), source, sink,
|
||||
"$@ XML input is constructed from a $@ and is vulnerable to: " + kinds + ".", sink.getNode(),
|
||||
"This", source.getNode(), "user-provided value"
|
||||
@@ -14,6 +14,74 @@ private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import experimental.semmle.python.Frameworks
|
||||
|
||||
/**
|
||||
* Since there is both XML module in normal and experimental Concepts,
|
||||
* we have to rename the experimental module as this.
|
||||
*/
|
||||
module ExperimentalXML {
|
||||
/**
|
||||
* A kind of XML vulnerability.
|
||||
*
|
||||
* See https://pypi.org/project/defusedxml/#python-xml-libraries
|
||||
*/
|
||||
class XMLVulnerabilityKind extends string {
|
||||
XMLVulnerabilityKind() {
|
||||
this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval"]
|
||||
}
|
||||
|
||||
/** Holds for Billion Laughs vulnerability kind. */
|
||||
predicate isBillionLaughs() { this = "Billion Laughs" }
|
||||
|
||||
/** Holds for Quadratic Blowup vulnerability kind. */
|
||||
predicate isQuadraticBlowup() { this = "Quadratic Blowup" }
|
||||
|
||||
/** Holds for XXE vulnerability kind. */
|
||||
predicate isXxe() { this = "XXE" }
|
||||
|
||||
/** Holds for DTD retrieval vulnerability kind. */
|
||||
predicate isDtdRetrieval() { this = "DTD retrieval" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that parses XML.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `XMLParsing` instead.
|
||||
*/
|
||||
class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range {
|
||||
/**
|
||||
* Gets the argument containing the content to parse.
|
||||
*/
|
||||
DataFlow::Node getAnInput() { result = super.getAnInput() }
|
||||
|
||||
/**
|
||||
* Holds if this XML parsing is vulnerable to `kind`.
|
||||
*/
|
||||
predicate vulnerableTo(XMLVulnerabilityKind kind) { super.vulnerableTo(kind) }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling XML parsing APIs. */
|
||||
module XMLParsing {
|
||||
/**
|
||||
* A data-flow node that parses XML.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `XMLParsing` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument containing the content to parse.
|
||||
*/
|
||||
abstract DataFlow::Node getAnInput();
|
||||
|
||||
/**
|
||||
* Holds if this XML parsing is vulnerable to `kind`.
|
||||
*/
|
||||
abstract predicate vulnerableTo(XMLVulnerabilityKind kind);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides classes for modeling LDAP query execution-related APIs. */
|
||||
module LdapQuery {
|
||||
/**
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
*/
|
||||
|
||||
private import experimental.semmle.python.frameworks.Stdlib
|
||||
private import experimental.semmle.python.frameworks.Xml
|
||||
private import experimental.semmle.python.frameworks.Flask
|
||||
private import experimental.semmle.python.frameworks.Django
|
||||
private import experimental.semmle.python.frameworks.Werkzeug
|
||||
|
||||
466
python/ql/src/experimental/semmle/python/frameworks/Xml.qll
Normal file
466
python/ql/src/experimental/semmle/python/frameworks/Xml.qll
Normal file
@@ -0,0 +1,466 @@
|
||||
/**
|
||||
* Provides class and predicates to track external data that
|
||||
* may represent malicious XML objects.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import experimental.semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
module XML = ExperimentalXML;
|
||||
|
||||
private module XmlEtree {
|
||||
/**
|
||||
* Provides models for `xml.etree` parsers
|
||||
*
|
||||
* See
|
||||
* - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLParser
|
||||
* - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLPullParser
|
||||
*/
|
||||
module XMLParser {
|
||||
/**
|
||||
* A source of instances of `xml.etree` parsers, extend this class to model new instances.
|
||||
*
|
||||
* This can include instantiations of the class, return values from function
|
||||
* calls, or a special parameter that will be set when functions are called by an external
|
||||
* library.
|
||||
*
|
||||
* Use the predicate `XMLParser::instance()` to get references to instances of `xml.etree` parsers.
|
||||
*/
|
||||
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
|
||||
|
||||
/** A direct instantiation of `xml.etree` parsers. */
|
||||
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
|
||||
ClassInstantiation() {
|
||||
this =
|
||||
API::moduleImport("xml")
|
||||
.getMember("etree")
|
||||
.getMember("ElementTree")
|
||||
.getMember("XMLParser")
|
||||
.getACall()
|
||||
or
|
||||
this =
|
||||
API::moduleImport("xml")
|
||||
.getMember("etree")
|
||||
.getMember("ElementTree")
|
||||
.getMember("XMLPullParser")
|
||||
.getACall()
|
||||
}
|
||||
}
|
||||
|
||||
/** Gets a reference to an `xml.etree` parser instance. */
|
||||
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result instanceof InstanceSource
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to an `xml.etree` parser instance. */
|
||||
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
|
||||
|
||||
/**
|
||||
* A call to the `feed` method of an `xml.etree` parser.
|
||||
*/
|
||||
private class XMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
|
||||
XMLEtreeParserFeedCall() { this.calls(instance(), "feed") }
|
||||
|
||||
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
|
||||
|
||||
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
|
||||
kind.isBillionLaughs() or kind.isQuadraticBlowup()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to either of:
|
||||
* - `xml.etree.ElementTree.fromstring`
|
||||
* - `xml.etree.ElementTree.fromstringlist`
|
||||
* - `xml.etree.ElementTree.XML`
|
||||
* - `xml.etree.ElementTree.XMLID`
|
||||
* - `xml.etree.ElementTree.parse`
|
||||
* - `xml.etree.ElementTree.iterparse`
|
||||
*/
|
||||
private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
|
||||
XMLEtreeParsing() {
|
||||
this =
|
||||
API::moduleImport("xml")
|
||||
.getMember("etree")
|
||||
.getMember("ElementTree")
|
||||
.getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"])
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
result in [
|
||||
this.getArg(0),
|
||||
// fromstring / XML / XMLID
|
||||
this.getArgByName("text"),
|
||||
// fromstringlist
|
||||
this.getArgByName("sequence"),
|
||||
// parse / iterparse
|
||||
this.getArgByName("source"),
|
||||
]
|
||||
}
|
||||
|
||||
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
|
||||
// note: it does not matter what `xml.etree` parser you are using, you cannot
|
||||
// change the security features anyway :|
|
||||
kind.isBillionLaughs() or kind.isQuadraticBlowup()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private module SaxBasedParsing {
|
||||
/**
|
||||
* A call to the `setFeature` method on a XML sax parser.
|
||||
*
|
||||
* See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature
|
||||
*/
|
||||
class SaxParserSetFeatureCall extends DataFlow::MethodCallNode {
|
||||
SaxParserSetFeatureCall() {
|
||||
this =
|
||||
API::moduleImport("xml")
|
||||
.getMember("sax")
|
||||
.getMember("make_parser")
|
||||
.getReturn()
|
||||
.getMember("setFeature")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
// The keyword argument names does not match documentation. I checked (with Python
|
||||
// 3.9.5) that the names used here actually works.
|
||||
DataFlow::Node getFeatureArg() { result in [this.getArg(0), this.getArgByName("name")] }
|
||||
|
||||
DataFlow::Node getStateArg() { result in [this.getArg(1), this.getArgByName("state")] }
|
||||
}
|
||||
|
||||
/** Gets a back-reference to the `setFeature` state argument `arg`. */
|
||||
private DataFlow::TypeTrackingNode saxParserSetFeatureStateArgBacktracker(
|
||||
DataFlow::TypeBackTracker t, DataFlow::Node arg
|
||||
) {
|
||||
t.start() and
|
||||
arg = any(SaxParserSetFeatureCall c).getStateArg() and
|
||||
result = arg.getALocalSource()
|
||||
or
|
||||
exists(DataFlow::TypeBackTracker t2 |
|
||||
result = saxParserSetFeatureStateArgBacktracker(t2, arg).backtrack(t2, t)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets a back-reference to the `setFeature` state argument `arg`. */
|
||||
DataFlow::LocalSourceNode saxParserSetFeatureStateArgBacktracker(DataFlow::Node arg) {
|
||||
result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
|
||||
*
|
||||
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
|
||||
*/
|
||||
private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
exists(SaxParserSetFeatureCall call |
|
||||
call.getFeatureArg() =
|
||||
API::moduleImport("xml")
|
||||
.getMember("sax")
|
||||
.getMember("handler")
|
||||
.getMember("feature_external_ges")
|
||||
.getAUse() and
|
||||
saxParserSetFeatureStateArgBacktracker(call.getStateArg())
|
||||
.asExpr()
|
||||
.(BooleanLiteral)
|
||||
.booleanValue() = true and
|
||||
result = call.getObject()
|
||||
)
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result)
|
||||
) and
|
||||
// take account of that we can set the feature to False, which makes the parser safe again
|
||||
not exists(SaxParserSetFeatureCall call |
|
||||
call.getObject() = result and
|
||||
call.getFeatureArg() =
|
||||
API::moduleImport("xml")
|
||||
.getMember("sax")
|
||||
.getMember("handler")
|
||||
.getMember("feature_external_ges")
|
||||
.getAUse() and
|
||||
saxParserSetFeatureStateArgBacktracker(call.getStateArg())
|
||||
.asExpr()
|
||||
.(BooleanLiteral)
|
||||
.booleanValue() = false
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
|
||||
*
|
||||
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
|
||||
*/
|
||||
DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() {
|
||||
result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end())
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the `parse` method on a SAX XML parser.
|
||||
*/
|
||||
private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
|
||||
XMLSaxInstanceParsing() {
|
||||
this =
|
||||
API::moduleImport("xml")
|
||||
.getMember("sax")
|
||||
.getMember("make_parser")
|
||||
.getReturn()
|
||||
.getMember("parse")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }
|
||||
|
||||
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
|
||||
// always vuln to these
|
||||
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
|
||||
or
|
||||
// can be vuln to other things if features has been turned on
|
||||
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
|
||||
(kind.isXxe() or kind.isDtdRetrieval())
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to either `parse` or `parseString` from `xml.sax` module.
|
||||
*
|
||||
* See:
|
||||
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse
|
||||
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString
|
||||
*/
|
||||
private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
|
||||
XMLSaxParsing() {
|
||||
this =
|
||||
API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
result in [
|
||||
this.getArg(0),
|
||||
// parseString
|
||||
this.getArgByName("string"),
|
||||
// parse
|
||||
this.getArgByName("source"),
|
||||
]
|
||||
}
|
||||
|
||||
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
|
||||
// always vuln to these
|
||||
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
|
||||
or
|
||||
// can be vuln to other things if features has been turned on
|
||||
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
|
||||
(kind.isXxe() or kind.isDtdRetrieval())
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`.
|
||||
*
|
||||
* Both of these modules are based on SAX parsers.
|
||||
*/
|
||||
private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
|
||||
XMLDomParsing() {
|
||||
this =
|
||||
API::moduleImport("xml")
|
||||
.getMember("dom")
|
||||
.getMember(["minidom", "pulldom"])
|
||||
.getMember(["parse", "parseString"])
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
result in [
|
||||
this.getArg(0),
|
||||
// parseString
|
||||
this.getArgByName("string"),
|
||||
// minidom.parse
|
||||
this.getArgByName("file"),
|
||||
// pulldom.parse
|
||||
this.getArgByName("stream_or_string"),
|
||||
]
|
||||
}
|
||||
|
||||
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
|
||||
|
||||
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
|
||||
this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and
|
||||
(kind.isXxe() or kind.isDtdRetrieval())
|
||||
or
|
||||
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private module Lxml {
|
||||
/**
|
||||
* Provides models for `lxml.etree` parsers.
|
||||
*
|
||||
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
|
||||
*/
|
||||
module XMLParser {
|
||||
/**
|
||||
* A source of instances of `lxml.etree` parsers, extend this class to model new instances.
|
||||
*
|
||||
* This can include instantiations of the class, return values from function
|
||||
* calls, or a special parameter that will be set when functions are called by an external
|
||||
* library.
|
||||
*
|
||||
* Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers.
|
||||
*/
|
||||
abstract class InstanceSource extends DataFlow::LocalSourceNode {
|
||||
/** Holds if this instance is vulnerable to `kind`. */
|
||||
abstract predicate vulnerableTo(XML::XMLVulnerabilityKind kind);
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `lxml.etree.XMLParser`.
|
||||
*
|
||||
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
|
||||
*/
|
||||
private class LXMLParser extends InstanceSource, DataFlow::CallCfgNode {
|
||||
LXMLParser() {
|
||||
this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
|
||||
}
|
||||
|
||||
// NOTE: it's not possible to change settings of a parser after constructing it
|
||||
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
|
||||
kind.isXxe() and
|
||||
(
|
||||
// resolve_entities has default True
|
||||
not exists(this.getArgByName("resolve_entities"))
|
||||
or
|
||||
this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t)
|
||||
)
|
||||
or
|
||||
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
|
||||
this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and
|
||||
not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False t)
|
||||
or
|
||||
kind.isDtdRetrieval() and
|
||||
this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and
|
||||
this.getArgByName("no_network").getALocalSource().asExpr() = any(False t)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `lxml.etree.get_default_parser`.
|
||||
*
|
||||
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
|
||||
*/
|
||||
private class LXMLDefaultParser extends InstanceSource, DataFlow::CallCfgNode {
|
||||
LXMLDefaultParser() {
|
||||
this =
|
||||
API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
|
||||
}
|
||||
|
||||
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
|
||||
// as highlighted by
|
||||
// https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
|
||||
// by default XXE is allow. so as long as the default parser has not been
|
||||
// overridden, the result is also vuln to XXE.
|
||||
kind.isXxe()
|
||||
// TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`.
|
||||
}
|
||||
}
|
||||
|
||||
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
|
||||
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) {
|
||||
t.start() and
|
||||
result = origin
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
|
||||
DataFlow::Node instance(InstanceSource origin) {
|
||||
instance(DataFlow::TypeTracker::end(), origin).flowsTo(result)
|
||||
}
|
||||
|
||||
/** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */
|
||||
DataFlow::Node instanceVulnerableTo(XML::XMLVulnerabilityKind kind) {
|
||||
exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind))
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the `feed` method of an `lxml` parser.
|
||||
*/
|
||||
private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
|
||||
LXMLParserFeedCall() { this.calls(instance(_), "feed") }
|
||||
|
||||
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
|
||||
|
||||
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
|
||||
this.calls(instanceVulnerableTo(kind), "feed")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to either of:
|
||||
* - `lxml.etree.fromstring`
|
||||
* - `lxml.etree.fromstringlist`
|
||||
* - `lxml.etree.XML`
|
||||
* - `lxml.etree.parse`
|
||||
* - `lxml.etree.parseid`
|
||||
*
|
||||
* See https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring
|
||||
*/
|
||||
private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
|
||||
LXMLParsing() {
|
||||
this =
|
||||
API::moduleImport("lxml")
|
||||
.getMember("etree")
|
||||
.getMember(["fromstring", "fromstringlist", "XML", "parse", "parseid"])
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
result in [
|
||||
this.getArg(0),
|
||||
// fromstring / XML
|
||||
this.getArgByName("text"),
|
||||
// fromstringlist
|
||||
this.getArgByName("strings"),
|
||||
// parse / parseid
|
||||
this.getArgByName("source"),
|
||||
]
|
||||
}
|
||||
|
||||
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
|
||||
|
||||
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
|
||||
this.getParserArg() = XMLParser::instanceVulnerableTo(kind)
|
||||
or
|
||||
kind.isXxe() and
|
||||
not exists(this.getParserArg())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private module Xmltodict {
|
||||
/**
|
||||
* A call to `xmltodict.parse`.
|
||||
*/
|
||||
private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
|
||||
XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() }
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
result in [this.getArg(0), this.getArgByName("xml_input")]
|
||||
}
|
||||
|
||||
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
|
||||
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
|
||||
this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
import python
|
||||
import experimental.semmle.python.Concepts
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.dataflow.new.RemoteFlowSources
|
||||
import semmle.python.dataflow.new.BarrierGuards
|
||||
|
||||
module XmlEntityInjection {
|
||||
import XmlEntityInjectionCustomizations::XmlEntityInjection
|
||||
|
||||
class XmlEntityInjectionConfiguration extends TaintTracking::Configuration {
|
||||
XmlEntityInjectionConfiguration() { this = "XmlEntityInjectionConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
source instanceof RemoteFlowSourceAsSource
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
|
||||
|
||||
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
|
||||
guard instanceof SanitizerGuard
|
||||
}
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
any(AdditionalTaintStep s).step(nodeFrom, nodeTo)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
/**
|
||||
* Provides default sources, sinks and sanitizers for detecting
|
||||
* "ldap injection"
|
||||
* vulnerabilities, as well as extension points for adding your own.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import experimental.semmle.python.Concepts
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.BarrierGuards
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides default sources, sinks and sanitizers for detecting "xml injection"
|
||||
* vulnerabilities, as well as extension points for adding your own.
|
||||
*/
|
||||
module XmlEntityInjection {
|
||||
/**
|
||||
* A data flow source for "xml injection" vulnerabilities.
|
||||
*/
|
||||
abstract class Source extends DataFlow::Node { }
|
||||
|
||||
/**
|
||||
* A data flow sink for "xml injection" vulnerabilities.
|
||||
*/
|
||||
abstract class Sink extends DataFlow::Node {
|
||||
/** Gets the kind of XML injection that this sink is vulnerable to. */
|
||||
abstract string getVulnerableKind();
|
||||
}
|
||||
|
||||
/**
|
||||
* A sanitizer guard for "xml injection" vulnerabilities.
|
||||
*/
|
||||
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
|
||||
|
||||
/**
|
||||
* A unit class for adding additional taint steps.
|
||||
*
|
||||
* Extend this class to add additional taint steps that should apply to `XmlEntityInjection`
|
||||
* taint configuration.
|
||||
*/
|
||||
class AdditionalTaintStep extends Unit {
|
||||
/**
|
||||
* Holds if the step from `nodeFrom` to `nodeTo` should be considered a taint
|
||||
* step for `XmlEntityInjection` configuration.
|
||||
*/
|
||||
abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo);
|
||||
}
|
||||
|
||||
/**
|
||||
* An input to a direct XML parsing function, considered as a flow sink.
|
||||
*
|
||||
* See `XML::XMLParsing`.
|
||||
*/
|
||||
class XMLParsingInputAsSink extends Sink {
|
||||
ExperimentalXML::XMLParsing xmlParsing;
|
||||
|
||||
XMLParsingInputAsSink() { this = xmlParsing.getAnInput() }
|
||||
|
||||
override string getVulnerableKind() { xmlParsing.vulnerableTo(result) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A source of remote user input, considered as a flow source.
|
||||
*/
|
||||
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
|
||||
|
||||
/**
|
||||
* A comparison with a constant string, considered as a sanitizer-guard.
|
||||
*/
|
||||
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
|
||||
|
||||
/**
|
||||
* A taint step for `io`'s `StringIO` and `BytesIO` methods.
|
||||
*/
|
||||
class IoAdditionalTaintStep extends AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
exists(DataFlow::CallCfgNode ioCalls |
|
||||
ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and
|
||||
nodeFrom = ioCalls.getArg(0) and
|
||||
nodeTo = ioCalls
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user