Python: Extend FileSystemAccess for xml.sax and xml.dom.* parsing

This commit is contained in:
Rasmus Wriedt Larsen
2022-03-31 18:03:35 +02:00
parent 1d7cec60ae
commit b4c0065aeb
3 changed files with 90 additions and 20 deletions

View File

@@ -3442,8 +3442,11 @@ private module StdlibPrivate {
/**
* A call to the `parse` method on a SAX XML parser.
*
* See https://docs.python.org/3/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.parse
*/
private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range,
FileSystemAccess::Range {
XMLSaxInstanceParsing() {
this =
API::moduleImport("xml")
@@ -3473,6 +3476,17 @@ private module StdlibPrivate {
// really give us any value, at least not as of right now).
none()
}
override DataFlow::Node getAPathArgument() {
// I considered whether we should try to reduce FPs from people passing file-like
// objects, which will not be a file system access (and couldn't cause a
// path-injection).
//
// I suppose that once we have proper flow-summary support for file-like objects,
// we can make the XXE/XML-bomb sinks allow an access-path, while the
// path-injection sink wouldn't, and then we will not end up with such FPs.
result = this.getAnInput()
}
}
/**
@@ -3513,6 +3527,29 @@ private module StdlibPrivate {
}
}
/**
* A call to `xml.sax.parse`, which takes either a filename or a file-like object as
* argument. To capture the filename for path-injection, we have this subclass.
*
* See
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse
*/
private class FileAccessFromXMLSaxParsing extends XMLSaxParsing, FileSystemAccess::Range {
FileAccessFromXMLSaxParsing() {
this = API::moduleImport("xml").getMember("sax").getMember("parse").getACall()
// I considered whether we should try to reduce FPs from people passing file-like
// objects, which will not be a file system access (and couldn't cause a
// path-injection).
//
// I suppose that once we have proper flow-summary support for file-like objects,
// we can make the XXE/XML-bomb sinks allow an access-path, while the
// path-injection sink wouldn't, and then we will not end up with such FPs.
}
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}
// ---------------------------------------------------------------------------
// xml.dom.*
// ---------------------------------------------------------------------------
@@ -3520,6 +3557,10 @@ private module StdlibPrivate {
* A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`.
*
* Both of these modules are based on SAX parsers.
*
* See
* - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse
* - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse
*/
private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLDomParsing() {
@@ -3556,6 +3597,35 @@ private module StdlibPrivate {
override DataFlow::Node getOutput() { result = this }
}
/**
* A call to the `parse` or `parseString` methods from `xml.dom.minidom` or
* `xml.dom.pulldom`, which takes either a filename or a file-like object as argument.
* To capture the filename for path-injection, we have this subclass.
*
* See
* - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse
* - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse
*/
private class FileAccessFromXMLDomParsing extends XMLDomParsing, FileSystemAccess::Range {
FileAccessFromXMLDomParsing() {
this =
API::moduleImport("xml")
.getMember("dom")
.getMember(["minidom", "pulldom"])
.getMember("parse")
.getACall()
// I considered whether we should try to reduce FPs from people passing file-like
// objects, which will not be a file system access (and couldn't cause a
// path-injection).
//
// I suppose that once we have proper flow-summary support for file-like objects,
// we can make the XXE/XML-bomb sinks allow an access-path, while the
// path-injection sink wouldn't, and then we will not end up with such FPs.
}
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}
}
// ---------------------------------------------------------------------------

View File

@@ -6,16 +6,16 @@ import xml.sax
x = "some xml"
# minidom
xml.dom.minidom.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..)
xml.dom.minidom.parse(file=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..)
xml.dom.minidom.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..)
xml.dom.minidom.parse(file=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..)
xml.dom.minidom.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parseString(..)
xml.dom.minidom.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parseString(..)
# pulldom
xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..)
xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..)
xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..)
xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..)
xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parseString(..)
xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parseString(..)
@@ -24,8 +24,8 @@ xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ decodeFormat=XML
# These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/)
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
xml.dom.minidom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..)
xml.dom.minidom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..)
xml.dom.minidom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..)
xml.dom.minidom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..)
xml.dom.pulldom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..)
xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..)
xml.dom.pulldom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..)
xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..)

View File

@@ -10,41 +10,41 @@ class MainHandler(xml.sax.ContentHandler):
def characters(self, data):
self._result.append(data)
xml.sax.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.sax.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.sax.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
xml.sax.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
xml.sax.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.sax.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser = xml.sax.make_parser()
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
parser.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
# You can make it vuln to both XXE and DTD retrieval by setting this flag
# see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..)
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
# Forward Type Tracking test
def func(cond):
parser = xml.sax.make_parser()
if cond:
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..)
else:
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
# make it vuln, then making it safe
# a bit of an edge-case, but is nice to be able to handle.
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
def check_conditional_assignment(cond):
parser = xml.sax.make_parser()
@@ -52,7 +52,7 @@ def check_conditional_assignment(cond):
parser.setFeature(xml.sax.handler.feature_external_ges, True)
else:
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..)
def check_conditional_assignment2(cond):
parser = xml.sax.make_parser()
@@ -61,4 +61,4 @@ def check_conditional_assignment2(cond):
else:
flag_value = False
parser.setFeature(xml.sax.handler.feature_external_ges, flag_value)
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..)