mirror of
https://github.com/github/codeql.git
synced 2025-12-24 04:36:35 +01:00
Python: Extend FileSystemAccess for xml.sax and xml.dom.* parsing
This commit is contained in:
@@ -3442,8 +3442,11 @@ private module StdlibPrivate {
|
||||
|
||||
/**
|
||||
* A call to the `parse` method on a SAX XML parser.
|
||||
*
|
||||
* See https://docs.python.org/3/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.parse
|
||||
*/
|
||||
private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
|
||||
private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range,
|
||||
FileSystemAccess::Range {
|
||||
XMLSaxInstanceParsing() {
|
||||
this =
|
||||
API::moduleImport("xml")
|
||||
@@ -3473,6 +3476,17 @@ private module StdlibPrivate {
|
||||
// really give us any value, at least not as of right now).
|
||||
none()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAPathArgument() {
|
||||
// I considered whether we should try to reduce FPs from people passing file-like
|
||||
// objects, which will not be a file system access (and couldn't cause a
|
||||
// path-injection).
|
||||
//
|
||||
// I suppose that once we have proper flow-summary support for file-like objects,
|
||||
// we can make the XXE/XML-bomb sinks allow an access-path, while the
|
||||
// path-injection sink wouldn't, and then we will not end up with such FPs.
|
||||
result = this.getAnInput()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -3513,6 +3527,29 @@ private module StdlibPrivate {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `xml.sax.parse`, which takes either a filename or a file-like object as
|
||||
* argument. To capture the filename for path-injection, we have this subclass.
|
||||
*
|
||||
* See
|
||||
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse
|
||||
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse
|
||||
*/
|
||||
private class FileAccessFromXMLSaxParsing extends XMLSaxParsing, FileSystemAccess::Range {
|
||||
FileAccessFromXMLSaxParsing() {
|
||||
this = API::moduleImport("xml").getMember("sax").getMember("parse").getACall()
|
||||
// I considered whether we should try to reduce FPs from people passing file-like
|
||||
// objects, which will not be a file system access (and couldn't cause a
|
||||
// path-injection).
|
||||
//
|
||||
// I suppose that once we have proper flow-summary support for file-like objects,
|
||||
// we can make the XXE/XML-bomb sinks allow an access-path, while the
|
||||
// path-injection sink wouldn't, and then we will not end up with such FPs.
|
||||
}
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// xml.dom.*
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -3520,6 +3557,10 @@ private module StdlibPrivate {
|
||||
* A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`.
|
||||
*
|
||||
* Both of these modules are based on SAX parsers.
|
||||
*
|
||||
* See
|
||||
* - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse
|
||||
* - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse
|
||||
*/
|
||||
private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
|
||||
XMLDomParsing() {
|
||||
@@ -3556,6 +3597,35 @@ private module StdlibPrivate {
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the `parse` or `parseString` methods from `xml.dom.minidom` or
|
||||
* `xml.dom.pulldom`, which takes either a filename or a file-like object as argument.
|
||||
* To capture the filename for path-injection, we have this subclass.
|
||||
*
|
||||
* See
|
||||
* - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse
|
||||
* - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse
|
||||
*/
|
||||
private class FileAccessFromXMLDomParsing extends XMLDomParsing, FileSystemAccess::Range {
|
||||
FileAccessFromXMLDomParsing() {
|
||||
this =
|
||||
API::moduleImport("xml")
|
||||
.getMember("dom")
|
||||
.getMember(["minidom", "pulldom"])
|
||||
.getMember("parse")
|
||||
.getACall()
|
||||
// I considered whether we should try to reduce FPs from people passing file-like
|
||||
// objects, which will not be a file system access (and couldn't cause a
|
||||
// path-injection).
|
||||
//
|
||||
// I suppose that once we have proper flow-summary support for file-like objects,
|
||||
// we can make the XXE/XML-bomb sinks allow an access-path, while the
|
||||
// path-injection sink wouldn't, and then we will not end up with such FPs.
|
||||
}
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -6,16 +6,16 @@ import xml.sax
|
||||
x = "some xml"
|
||||
|
||||
# minidom
|
||||
xml.dom.minidom.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..)
|
||||
xml.dom.minidom.parse(file=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..)
|
||||
xml.dom.minidom.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..)
|
||||
xml.dom.minidom.parse(file=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..)
|
||||
|
||||
xml.dom.minidom.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parseString(..)
|
||||
xml.dom.minidom.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parseString(..)
|
||||
|
||||
|
||||
# pulldom
|
||||
xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..)
|
||||
xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..)
|
||||
xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..)
|
||||
xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..)
|
||||
|
||||
xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parseString(..)
|
||||
xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parseString(..)
|
||||
@@ -24,8 +24,8 @@ xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ decodeFormat=XML
|
||||
# These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/)
|
||||
parser = xml.sax.make_parser()
|
||||
parser.setFeature(xml.sax.handler.feature_external_ges, True)
|
||||
xml.dom.minidom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..)
|
||||
xml.dom.minidom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..)
|
||||
xml.dom.minidom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..)
|
||||
xml.dom.minidom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..)
|
||||
|
||||
xml.dom.pulldom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..)
|
||||
xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..)
|
||||
xml.dom.pulldom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..)
|
||||
xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..)
|
||||
|
||||
@@ -10,41 +10,41 @@ class MainHandler(xml.sax.ContentHandler):
|
||||
def characters(self, data):
|
||||
self._result.append(data)
|
||||
|
||||
xml.sax.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
|
||||
xml.sax.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
|
||||
xml.sax.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
|
||||
xml.sax.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
|
||||
|
||||
xml.sax.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
|
||||
xml.sax.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
|
||||
|
||||
parser = xml.sax.make_parser()
|
||||
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
|
||||
parser.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
|
||||
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
|
||||
parser.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
|
||||
|
||||
# You can make it vuln to both XXE and DTD retrieval by setting this flag
|
||||
# see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
|
||||
parser = xml.sax.make_parser()
|
||||
parser.setFeature(xml.sax.handler.feature_external_ges, True)
|
||||
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
|
||||
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..)
|
||||
|
||||
parser = xml.sax.make_parser()
|
||||
parser.setFeature(xml.sax.handler.feature_external_ges, False)
|
||||
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
|
||||
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
|
||||
|
||||
# Forward Type Tracking test
|
||||
def func(cond):
|
||||
parser = xml.sax.make_parser()
|
||||
if cond:
|
||||
parser.setFeature(xml.sax.handler.feature_external_ges, True)
|
||||
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
|
||||
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..)
|
||||
else:
|
||||
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
|
||||
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
|
||||
|
||||
# make it vuln, then making it safe
|
||||
# a bit of an edge-case, but is nice to be able to handle.
|
||||
parser = xml.sax.make_parser()
|
||||
parser.setFeature(xml.sax.handler.feature_external_ges, True)
|
||||
parser.setFeature(xml.sax.handler.feature_external_ges, False)
|
||||
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
|
||||
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..)
|
||||
|
||||
def check_conditional_assignment(cond):
|
||||
parser = xml.sax.make_parser()
|
||||
@@ -52,7 +52,7 @@ def check_conditional_assignment(cond):
|
||||
parser.setFeature(xml.sax.handler.feature_external_ges, True)
|
||||
else:
|
||||
parser.setFeature(xml.sax.handler.feature_external_ges, False)
|
||||
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
|
||||
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..)
|
||||
|
||||
def check_conditional_assignment2(cond):
|
||||
parser = xml.sax.make_parser()
|
||||
@@ -61,4 +61,4 @@ def check_conditional_assignment2(cond):
|
||||
else:
|
||||
flag_value = False
|
||||
parser.setFeature(xml.sax.handler.feature_external_ges, flag_value)
|
||||
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
|
||||
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..)
|
||||
|
||||
Reference in New Issue
Block a user