Python: Use concept tests for XML Parsing

I was loosing my mind from looking through those .expected files

Just going to take it one file at time, to make reviewing easier
This commit is contained in:
Rasmus Wriedt Larsen
2022-03-03 20:36:16 +01:00
parent 4b03f5c724
commit faebaee141
4 changed files with 73 additions and 90 deletions

View File

@@ -0,0 +1,33 @@
import python
import experimental.semmle.python.Concepts
import experimental.semmle.python.frameworks.Xml
import semmle.python.dataflow.new.DataFlow
import TestUtilities.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode
class XmlParsingTest extends InlineExpectationsTest {
XmlParsingTest() { this = "XmlParsingTest" }
override string getARelevantTag() { result in ["input", "vuln"] }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(XML::XMLParsing parsing |
exists(DataFlow::Node input |
input = parsing.getAnInput() and
location = input.getLocation() and
element = input.toString() and
value = prettyNodeForInlineTest(input) and
tag = "input"
)
or
exists(XML::XMLVulnerabilityKind kind |
parsing.vulnerable(kind) and
location = parsing.getLocation() and
element = parsing.toString() and
value = "'" + kind + "'" and
tag = "vuln"
)
)
}
}

View File

@@ -0,0 +1,40 @@
from io import StringIO
import lxml.etree
x = "some xml"
# different parsing methods
lxml.etree.fromstring(x) # $ input=x vuln='XXE'
lxml.etree.fromstringlist([x]) # $ input=List vuln='XXE'
lxml.etree.XML(x) # $ input=x vuln='XXE'
lxml.etree.parse(StringIO(x)).getroot() # $ input=StringIO(..) vuln='XXE'
# With default parsers (nothing changed)
parser = lxml.etree.XMLParser()
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE'
parser = lxml.etree.get_default_parser()
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE'
# XXE-safe
parser = lxml.etree.XMLParser(resolve_entities=False)
lxml.etree.fromstring(x, parser=parser) # $ input=x
# XXE-vuln
parser = lxml.etree.XMLParser(resolve_entities=True)
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE'
# Billion laughs vuln (also XXE)
parser = lxml.etree.XMLParser(huge_tree=True)
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' vuln='XXE'
# Billion laughs, but not XXE
parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True)
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
# DTD retrival vuln (also XXE)
parser = lxml.etree.XMLParser(load_dtd=True, no_network=False)
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='DTD retrieval' vuln='XXE'

View File

@@ -1,90 +0,0 @@
from flask import request, Flask
from io import StringIO, BytesIO
import lxml.etree
app = Flask(__name__)
# Parsing
@app.route("/lxml_etree_fromstring")
def lxml_etree_fromstring():
xml_content = request.args['xml_content']
return lxml.etree.fromstring(xml_content).text # NOT OK for XXE
@app.route("/lxml_etree_fromstringlist")
def lxml_etree_fromstringlist():
xml_content = request.args['xml_content']
return lxml.etree.fromstringlist([xml_content]).text # NOT OK for XXE
@app.route("/lxml_etree_XML")
def lxml_etree_XML():
xml_content = request.args['xml_content']
return lxml.etree.XML(xml_content).text # NOT OK for XXE
@app.route("/lxml_etree_parse")
def lxml_etree_parse():
xml_content = request.args['xml_content']
return lxml.etree.parse(StringIO(xml_content)).getroot().text # NOT OK for XXE
# With parsers - Default
@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser")
def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser()
return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE
@app.route("/lxml_etree_fromstring-lxml.etree.get_default_parser")
def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.get_default_parser()
return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE
# With parsers - With options
# XXE-safe
@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(resolve_entities=False)
return lxml.etree.fromstring(xml_content, parser=parser).text # OK for XXE
# XXE-vuln
@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(resolve_entities=True)
return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE
# Billion laughs and quadratic blowup (huge_tree)
@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True)
return lxml.etree.fromstring(xml_content, parser=parser).text # OK for XXE, NOT OK for billion laughs/quadratic
@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(huge_tree=True)
return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for XXE, NOT OK for billion laughs/quadratic
# DTD retrival
@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(resolve_entities=False, load_dtd=True, no_network=False)
return lxml.etree.fromstring(xml_content, parser=parser).text # NOT OK for DTD, OK for rest