diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index c430594d05b..b553c8d927d 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -580,12 +580,7 @@ module XML { * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParsing` instead. */ - class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range { - /** - * Gets the argument containing the content to parse. - */ - DataFlow::Node getAnInput() { result = super.getAnInput() } - + class XMLParsing extends Decoding instanceof XMLParsing::Range { /** * Holds if this XML parsing is vulnerable to `kind`. */ @@ -600,16 +595,13 @@ module XML { * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParsing` instead. */ - abstract class Range extends DataFlow::Node { - /** - * Gets the argument containing the content to parse. - */ - abstract DataFlow::Node getAnInput(); - + abstract class Range extends Decoding::Range { /** * Holds if this XML parsing is vulnerable to `kind`. */ abstract predicate vulnerableTo(XMLParsingVulnerabilityKind kind); + + override string getFormat() { result = "XML" } } } } diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 4987e24bce4..c072295c461 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -69,6 +69,15 @@ private module XmlEtree { override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { kind.isBillionLaughs() or kind.isQuadraticBlowup() } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + exists(DataFlow::Node objRef | + DataFlow::localFlow(this.getObject(), objRef) and + result.(DataFlow::MethodCallNode).calls(objRef, "close") + ) + } } } @@ -108,6 +117,10 @@ private module XmlEtree { // change the security features anyway :| kind.isBillionLaughs() or kind.isQuadraticBlowup() } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { result = this } } } @@ -226,6 +239,15 @@ private module SaxBasedParsing { this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and (kind.isXxe() or kind.isDtdRetrieval()) } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + // note: the output of parsing with SAX is that the content handler gets the + // data... but we don't currently model this (it's not trivial to do, and won't + // really give us any value, at least not as of right now). + none() + } } /** @@ -259,6 +281,15 @@ private module SaxBasedParsing { this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and (kind.isXxe() or kind.isDtdRetrieval()) } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + // note: the output of parsing with SAX is that the content handler gets the + // data... but we don't currently model this (it's not trivial to do, and won't + // really give us any value, at least not as of right now). + none() + } } /** @@ -296,6 +327,10 @@ private module SaxBasedParsing { or (kind.isBillionLaughs() or kind.isQuadraticBlowup()) } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { result = this } } } @@ -400,6 +435,15 @@ private module Lxml { override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { this.calls(instanceVulnerableTo(kind), "feed") } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + exists(DataFlow::Node objRef | + DataFlow::localFlow(this.getObject(), objRef) and + result.(DataFlow::MethodCallNode).calls(objRef, "close") + ) + } } } @@ -442,6 +486,10 @@ private module Lxml { kind.isXxe() and not exists(this.getParserArg()) } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { result = this } } } @@ -460,5 +508,9 @@ private module Xmltodict { (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f) } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { result = this } } } diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py index ee8f3fc69c1..f1dbd5390ad 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py @@ -4,51 +4,51 @@ import lxml.etree x = "some xml" # different parsing methods -lxml.etree.fromstring(x) # $ xmlInput=x xmlVuln='XXE' -lxml.etree.fromstring(text=x) # $ xmlInput=x xmlVuln='XXE' +lxml.etree.fromstring(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) +lxml.etree.fromstring(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) -lxml.etree.fromstringlist([x]) # $ xmlInput=List xmlVuln='XXE' -lxml.etree.fromstringlist(strings=[x]) # $ xmlInput=List xmlVuln='XXE' +lxml.etree.fromstringlist([x]) # $ decodeFormat=XML decodeInput=List xmlVuln='XXE' decodeOutput=lxml.etree.fromstringlist(..) +lxml.etree.fromstringlist(strings=[x]) # $ decodeFormat=XML decodeInput=List xmlVuln='XXE' decodeOutput=lxml.etree.fromstringlist(..) -lxml.etree.XML(x) # $ xmlInput=x xmlVuln='XXE' -lxml.etree.XML(text=x) # $ xmlInput=x xmlVuln='XXE' +lxml.etree.XML(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XML(..) +lxml.etree.XML(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XML(..) -lxml.etree.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='XXE' -lxml.etree.parse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='XXE' +lxml.etree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) +lxml.etree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) -lxml.etree.parseid(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='XXE' -lxml.etree.parseid(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='XXE' +lxml.etree.parseid(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) +lxml.etree.parseid(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) # With default parsers (nothing changed) parser = lxml.etree.XMLParser() -lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='XXE' +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) parser = lxml.etree.get_default_parser() -lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='XXE' +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) # manual use of feed method parser = lxml.etree.XMLParser() -parser.feed(x) # $ xmlInput=x xmlVuln='XXE' -parser.feed(data=x) # $ xmlInput=x xmlVuln='XXE' -parser.close() +parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' +parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' +parser.close() # $ decodeOutput=parser.close() # XXE-safe parser = lxml.etree.XMLParser(resolve_entities=False) -lxml.etree.fromstring(x, parser) # $ xmlInput=x -lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x +lxml.etree.fromstring(x, parser) # $ decodeFormat=XML decodeInput=x decodeOutput=lxml.etree.fromstring(..) +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x decodeOutput=lxml.etree.fromstring(..) # XXE-vuln parser = lxml.etree.XMLParser(resolve_entities=True) -lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='XXE' +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) # Billion laughs vuln (also XXE) parser = lxml.etree.XMLParser(huge_tree=True) -lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE' +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) # Safe for both Billion laughs and XXE parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) -lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x decodeOutput=lxml.etree.fromstring(..) # DTD retrival vuln (also XXE) parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) -lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='DTD retrieval' xmlVuln='XXE' +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py index b86770b8d6c..c6152c75807 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py @@ -6,26 +6,26 @@ import xml.sax x = "some xml" # minidom -xml.dom.minidom.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.dom.minidom.parse(file=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.dom.minidom.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..) +xml.dom.minidom.parse(file=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..) -xml.dom.minidom.parseString(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.dom.minidom.parseString(string=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.dom.minidom.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parseString(..) +xml.dom.minidom.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parseString(..) # pulldom -xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..) +xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..) -xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parseString(..) +xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parseString(..) # These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/) parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) -xml.dom.minidom.parse(StringIO(x), parser) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' -xml.dom.minidom.parse(StringIO(x), parser=parser) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' +xml.dom.minidom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) +xml.dom.minidom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) -xml.dom.pulldom.parse(StringIO(x), parser) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' -xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' +xml.dom.pulldom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) +xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py index c5d141a3715..0ed750ba8c7 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py @@ -4,40 +4,40 @@ import xml.etree.ElementTree x = "some xml" # Parsing in different ways -xml.etree.ElementTree.fromstring(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.fromstring(text=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.fromstring(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstring(..) +xml.etree.ElementTree.fromstring(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstring(..) -xml.etree.ElementTree.fromstringlist([x]) # $ xmlInput=List xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ xmlInput=List xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.fromstringlist([x]) # $ decodeFormat=XML decodeInput=List xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstringlist(..) +xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ decodeFormat=XML decodeInput=List xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstringlist(..) -xml.etree.ElementTree.XML(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.XML(text=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.XML(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XML(..) +xml.etree.ElementTree.XML(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XML(..) -xml.etree.ElementTree.XMLID(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.XMLID(text=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.XMLID(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XMLID(..) +xml.etree.ElementTree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XMLID(..) -xml.etree.ElementTree.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.parse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..) +xml.etree.ElementTree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..) -xml.etree.ElementTree.iterparse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.iterparse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) +xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) # With parsers (no options available to disable/enable security features) parser = xml.etree.ElementTree.XMLParser() -xml.etree.ElementTree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstring(..) # manual use of feed method parser = xml.etree.ElementTree.XMLParser() -parser.feed(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -parser.feed(data=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -parser.close() +parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.close() # $ decodeOutput=parser.close() # manual use of feed method on XMLPullParser parser = xml.etree.ElementTree.XMLPullParser() -parser.feed(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -parser.feed(data=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -parser.close() +parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.close() # $ decodeOutput=parser.close() # note: it's technically possible to use the thing wrapper func `fromstring` with an # `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py index c0e5923c5c0..8dbe9d4ae99 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py @@ -10,41 +10,41 @@ class MainHandler(xml.sax.ContentHandler): def characters(self, data): self._result.append(data) -xml.sax.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.sax.parse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.sax.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.sax.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.sax.parseString(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.sax.parseString(string=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.sax.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.sax.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' parser = xml.sax.make_parser() -parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -parser.parse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' # You can make it vuln to both XXE and DTD retrieval by setting this flag # see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) -parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, False) -parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' # Forward Type Tracking test def func(cond): parser = xml.sax.make_parser() if cond: parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' else: - parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' # make it vuln, then making it safe # a bit of an edge-case, but is nice to be able to handle. parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) parser.setFeature(xml.sax.handler.feature_external_ges, False) -parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' def check_conditional_assignment(cond): parser = xml.sax.make_parser() @@ -52,7 +52,7 @@ def check_conditional_assignment(cond): parser.setFeature(xml.sax.handler.feature_external_ges, True) else: parser.setFeature(xml.sax.handler.feature_external_ges, False) - parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' def check_conditional_assignment2(cond): parser = xml.sax.make_parser() @@ -61,4 +61,4 @@ def check_conditional_assignment2(cond): else: flag_value = False parser.setFeature(xml.sax.handler.feature_external_ges, flag_value) - parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py index 27d04862f83..01dc2f3c484 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py @@ -2,7 +2,7 @@ import xmltodict x = "some xml" -xmltodict.parse(x) # $ xmlInput=x -xmltodict.parse(xml_input=x) # $ xmlInput=x +xmltodict.parse(x) # $ decodeFormat=XML decodeInput=x decodeOutput=xmltodict.parse(..) +xmltodict.parse(xml_input=x) # $ decodeFormat=XML decodeInput=x decodeOutput=xmltodict.parse(..) -xmltodict.parse(x, disable_entities=False) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xmltodict.parse(x, disable_entities=False) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xmltodict.parse(..) diff --git a/python/ql/test/experimental/meta/ConceptsTest.qll b/python/ql/test/experimental/meta/ConceptsTest.qll index e9f71356963..24cbbab2d44 100644 --- a/python/ql/test/experimental/meta/ConceptsTest.qll +++ b/python/ql/test/experimental/meta/ConceptsTest.qll @@ -548,14 +548,6 @@ class XmlParsingTest extends InlineExpectationsTest { override predicate hasActualResult(Location location, string element, string tag, string value) { exists(location.getFile().getRelativePath()) and exists(XML::XMLParsing parsing | - exists(DataFlow::Node input | - input = parsing.getAnInput() and - location = input.getLocation() and - element = input.toString() and - value = prettyNodeForInlineTest(input) and - tag = "xmlInput" - ) - or exists(XML::XMLParsingVulnerabilityKind kind | parsing.vulnerableTo(kind) and location = parsing.getLocation() and