Python: Make XMLParsing a Decoding subclass

This commit is contained in:
Rasmus Wriedt Larsen
2022-03-29 16:48:30 +02:00
committed by Rasmus Wriedt Larsen
parent 35ccba2ec1
commit 1ea4bcc59f
8 changed files with 124 additions and 88 deletions

View File

@@ -580,12 +580,7 @@ module XML {
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParsing` instead.
*/
class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range {
/**
* Gets the argument containing the content to parse.
*/
DataFlow::Node getAnInput() { result = super.getAnInput() }
class XMLParsing extends Decoding instanceof XMLParsing::Range {
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
@@ -600,16 +595,13 @@ module XML {
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParsing` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the content to parse.
*/
abstract DataFlow::Node getAnInput();
abstract class Range extends Decoding::Range {
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
abstract predicate vulnerableTo(XMLParsingVulnerabilityKind kind);
override string getFormat() { result = "XML" }
}
}
}

View File

@@ -69,6 +69,15 @@ private module XmlEtree {
override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) {
kind.isBillionLaughs() or kind.isQuadraticBlowup()
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
exists(DataFlow::Node objRef |
DataFlow::localFlow(this.getObject(), objRef) and
result.(DataFlow::MethodCallNode).calls(objRef, "close")
)
}
}
}
@@ -108,6 +117,10 @@ private module XmlEtree {
// change the security features anyway :|
kind.isBillionLaughs() or kind.isQuadraticBlowup()
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() { result = this }
}
}
@@ -226,6 +239,15 @@ private module SaxBasedParsing {
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
// note: the output of parsing with SAX is that the content handler gets the
// data... but we don't currently model this (it's not trivial to do, and won't
// really give us any value, at least not as of right now).
none()
}
}
/**
@@ -259,6 +281,15 @@ private module SaxBasedParsing {
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
// note: the output of parsing with SAX is that the content handler gets the
// data... but we don't currently model this (it's not trivial to do, and won't
// really give us any value, at least not as of right now).
none()
}
}
/**
@@ -296,6 +327,10 @@ private module SaxBasedParsing {
or
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() { result = this }
}
}
@@ -400,6 +435,15 @@ private module Lxml {
override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) {
this.calls(instanceVulnerableTo(kind), "feed")
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() {
exists(DataFlow::Node objRef |
DataFlow::localFlow(this.getObject(), objRef) and
result.(DataFlow::MethodCallNode).calls(objRef, "close")
)
}
}
}
@@ -442,6 +486,10 @@ private module Lxml {
kind.isXxe() and
not exists(this.getParserArg())
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() { result = this }
}
}
@@ -460,5 +508,9 @@ private module Xmltodict {
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f)
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getOutput() { result = this }
}
}

View File

@@ -4,51 +4,51 @@ import lxml.etree
x = "some xml"
# different parsing methods
lxml.etree.fromstring(x) # $ xmlInput=x xmlVuln='XXE'
lxml.etree.fromstring(text=x) # $ xmlInput=x xmlVuln='XXE'
lxml.etree.fromstring(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
lxml.etree.fromstring(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
lxml.etree.fromstringlist([x]) # $ xmlInput=List xmlVuln='XXE'
lxml.etree.fromstringlist(strings=[x]) # $ xmlInput=List xmlVuln='XXE'
lxml.etree.fromstringlist([x]) # $ decodeFormat=XML decodeInput=List xmlVuln='XXE' decodeOutput=lxml.etree.fromstringlist(..)
lxml.etree.fromstringlist(strings=[x]) # $ decodeFormat=XML decodeInput=List xmlVuln='XXE' decodeOutput=lxml.etree.fromstringlist(..)
lxml.etree.XML(x) # $ xmlInput=x xmlVuln='XXE'
lxml.etree.XML(text=x) # $ xmlInput=x xmlVuln='XXE'
lxml.etree.XML(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XML(..)
lxml.etree.XML(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XML(..)
lxml.etree.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='XXE'
lxml.etree.parse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='XXE'
lxml.etree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parse(..)
lxml.etree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parse(..)
lxml.etree.parseid(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='XXE'
lxml.etree.parseid(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='XXE'
lxml.etree.parseid(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..)
lxml.etree.parseid(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..)
# With default parsers (nothing changed)
parser = lxml.etree.XMLParser()
lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='XXE'
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
parser = lxml.etree.get_default_parser()
lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='XXE'
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
# manual use of feed method
parser = lxml.etree.XMLParser()
parser.feed(x) # $ xmlInput=x xmlVuln='XXE'
parser.feed(data=x) # $ xmlInput=x xmlVuln='XXE'
parser.close()
parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE'
parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE'
parser.close() # $ decodeOutput=parser.close()
# XXE-safe
parser = lxml.etree.XMLParser(resolve_entities=False)
lxml.etree.fromstring(x, parser) # $ xmlInput=x
lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x
lxml.etree.fromstring(x, parser) # $ decodeFormat=XML decodeInput=x decodeOutput=lxml.etree.fromstring(..)
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x decodeOutput=lxml.etree.fromstring(..)
# XXE-vuln
parser = lxml.etree.XMLParser(resolve_entities=True)
lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='XXE'
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
# Billion laughs vuln (also XXE)
parser = lxml.etree.XMLParser(huge_tree=True)
lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
# Safe for both Billion laughs and XXE
parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True)
lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x decodeOutput=lxml.etree.fromstring(..)
# DTD retrival vuln (also XXE)
parser = lxml.etree.XMLParser(load_dtd=True, no_network=False)
lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='DTD retrieval' xmlVuln='XXE'
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)

View File

@@ -6,26 +6,26 @@ import xml.sax
x = "some xml"
# minidom
xml.dom.minidom.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.dom.minidom.parse(file=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.dom.minidom.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..)
xml.dom.minidom.parse(file=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..)
xml.dom.minidom.parseString(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.dom.minidom.parseString(string=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.dom.minidom.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parseString(..)
xml.dom.minidom.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parseString(..)
# pulldom
xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..)
xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..)
xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parseString(..)
xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parseString(..)
# These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/)
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
xml.dom.minidom.parse(StringIO(x), parser) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
xml.dom.minidom.parse(StringIO(x), parser=parser) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
xml.dom.minidom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..)
xml.dom.minidom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..)
xml.dom.pulldom.parse(StringIO(x), parser) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
xml.dom.pulldom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..)
xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..)

View File

@@ -4,40 +4,40 @@ import xml.etree.ElementTree
x = "some xml"
# Parsing in different ways
xml.etree.ElementTree.fromstring(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.etree.ElementTree.fromstring(text=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.etree.ElementTree.fromstring(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstring(..)
xml.etree.ElementTree.fromstring(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstring(..)
xml.etree.ElementTree.fromstringlist([x]) # $ xmlInput=List xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ xmlInput=List xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.etree.ElementTree.fromstringlist([x]) # $ decodeFormat=XML decodeInput=List xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstringlist(..)
xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ decodeFormat=XML decodeInput=List xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstringlist(..)
xml.etree.ElementTree.XML(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.etree.ElementTree.XML(text=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.etree.ElementTree.XML(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XML(..)
xml.etree.ElementTree.XML(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XML(..)
xml.etree.ElementTree.XMLID(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.etree.ElementTree.XMLID(text=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.etree.ElementTree.XMLID(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XMLID(..)
xml.etree.ElementTree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XMLID(..)
xml.etree.ElementTree.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.etree.ElementTree.parse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.etree.ElementTree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..)
xml.etree.ElementTree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..)
xml.etree.ElementTree.iterparse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.etree.ElementTree.iterparse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..)
xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..)
# With parsers (no options available to disable/enable security features)
parser = xml.etree.ElementTree.XMLParser()
xml.etree.ElementTree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.etree.ElementTree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstring(..)
# manual use of feed method
parser = xml.etree.ElementTree.XMLParser()
parser.feed(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.feed(data=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.close()
parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.close() # $ decodeOutput=parser.close()
# manual use of feed method on XMLPullParser
parser = xml.etree.ElementTree.XMLPullParser()
parser.feed(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.feed(data=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.close()
parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.close() # $ decodeOutput=parser.close()
# note: it's technically possible to use the thing wrapper func `fromstring` with an
# `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it

View File

@@ -10,41 +10,41 @@ class MainHandler(xml.sax.ContentHandler):
def characters(self, data):
self._result.append(data)
xml.sax.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.sax.parse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.sax.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.sax.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.sax.parseString(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.sax.parseString(string=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.sax.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xml.sax.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser = xml.sax.make_parser()
parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.parse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
# You can make it vuln to both XXE and DTD retrieval by setting this flag
# see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
# Forward Type Tracking test
def func(cond):
parser = xml.sax.make_parser()
if cond:
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
else:
parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
# make it vuln, then making it safe
# a bit of an edge-case, but is nice to be able to handle.
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
def check_conditional_assignment(cond):
parser = xml.sax.make_parser()
@@ -52,7 +52,7 @@ def check_conditional_assignment(cond):
parser.setFeature(xml.sax.handler.feature_external_ges, True)
else:
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
def check_conditional_assignment2(cond):
parser = xml.sax.make_parser()
@@ -61,4 +61,4 @@ def check_conditional_assignment2(cond):
else:
flag_value = False
parser.setFeature(xml.sax.handler.feature_external_ges, flag_value)
parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'
parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE'

View File

@@ -2,7 +2,7 @@ import xmltodict
x = "some xml"
xmltodict.parse(x) # $ xmlInput=x
xmltodict.parse(xml_input=x) # $ xmlInput=x
xmltodict.parse(x) # $ decodeFormat=XML decodeInput=x decodeOutput=xmltodict.parse(..)
xmltodict.parse(xml_input=x) # $ decodeFormat=XML decodeInput=x decodeOutput=xmltodict.parse(..)
xmltodict.parse(x, disable_entities=False) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
xmltodict.parse(x, disable_entities=False) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xmltodict.parse(..)

View File

@@ -548,14 +548,6 @@ class XmlParsingTest extends InlineExpectationsTest {
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(XML::XMLParsing parsing |
exists(DataFlow::Node input |
input = parsing.getAnInput() and
location = input.getLocation() and
element = input.toString() and
value = prettyNodeForInlineTest(input) and
tag = "xmlInput"
)
or
exists(XML::XMLParsingVulnerabilityKind kind |
parsing.vulnerableTo(kind) and
location = parsing.getLocation() and