mirror of
https://github.com/github/codeql.git
synced 2026-04-29 10:45:15 +02:00
Python: Model file access from XML parsing
This commit is contained in:
@@ -275,13 +275,38 @@ private module Lxml {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `lxml.etree.ElementTree.parse` or `lxml.etree.ElementTree.parseid`, which
|
||||
* takes either a filename or a file-like object as argument. To capture the filename
|
||||
* for path-injection, we have this subclass.
|
||||
*
|
||||
* See
|
||||
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse
|
||||
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid
|
||||
*/
|
||||
private class FileAccessFromLXMLParsing extends LXMLParsing, FileSystemAccess::Range {
|
||||
FileAccessFromLXMLParsing() {
|
||||
this = API::moduleImport("lxml").getMember("etree").getMember(["parse", "parseid"]).getACall()
|
||||
// I considered whether we should try to reduce FPs from people passing file-like
|
||||
// objects, which will not be a file system access (and couldn't cause a
|
||||
// path-injection).
|
||||
//
|
||||
// I suppose that once we have proper flow-summary support for file-like objects,
|
||||
// we can make the XXE/XML-bomb sinks allow an access-path, while the
|
||||
// path-injection sink wouldn't, and then we will not end up with such FPs.
|
||||
}
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `lxml.etree.iterparse`
|
||||
*
|
||||
* See
|
||||
* - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.iterparse
|
||||
*/
|
||||
private class LXMLIterparseCall extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
|
||||
private class LXMLIterparseCall extends DataFlow::CallCfgNode, XML::XMLParsing::Range,
|
||||
FileSystemAccess::Range {
|
||||
LXMLIterparseCall() {
|
||||
this = API::moduleImport("lxml").getMember("etree").getMember("iterparse").getACall()
|
||||
}
|
||||
@@ -303,5 +328,7 @@ private module Lxml {
|
||||
override predicate mayExecuteInput() { none() }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3306,6 +3306,35 @@ private module StdlibPrivate {
|
||||
result = this
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `xml.etree.ElementTree.parse` or `xml.etree.ElementTree.iterparse`, which
|
||||
* takes either a filename or a file-like object as argument. To capture the filename
|
||||
* for path-injection, we have this subclass.
|
||||
*
|
||||
* See
|
||||
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse
|
||||
* - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse
|
||||
*/
|
||||
private class FileAccessFromXMLEtreeParsing extends XMLEtreeParsing, FileSystemAccess::Range {
|
||||
FileAccessFromXMLEtreeParsing() {
|
||||
this =
|
||||
API::moduleImport("xml")
|
||||
.getMember("etree")
|
||||
.getMember("ElementTree")
|
||||
.getMember(["parse", "iterparse"])
|
||||
.getACall()
|
||||
// I considered whether we should try to reduce FPs from people passing file-like
|
||||
// objects, which will not be a file system access (and couldn't cause a
|
||||
// path-injection).
|
||||
//
|
||||
// I suppose that once we have proper flow-summary support for file-like objects,
|
||||
// we can make the XXE/XML-bomb sinks allow an access-path, while the
|
||||
// path-injection sink wouldn't, and then we will not end up with such FPs.
|
||||
}
|
||||
|
||||
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -17,14 +17,14 @@ lxml.etree.XMLID(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutpu
|
||||
lxml.etree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XMLID(..)
|
||||
|
||||
xml_file = 'xml_file'
|
||||
lxml.etree.parse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..)
|
||||
lxml.etree.parse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..)
|
||||
lxml.etree.parse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) getAPathArgument=xml_file
|
||||
lxml.etree.parse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) getAPathArgument=xml_file
|
||||
|
||||
lxml.etree.parseid(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..)
|
||||
lxml.etree.parseid(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..)
|
||||
lxml.etree.parseid(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) getAPathArgument=xml_file
|
||||
lxml.etree.parseid(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) getAPathArgument=xml_file
|
||||
|
||||
lxml.etree.iterparse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..)
|
||||
lxml.etree.iterparse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..)
|
||||
lxml.etree.iterparse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file
|
||||
lxml.etree.iterparse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file
|
||||
|
||||
# With default parsers (nothing changed)
|
||||
parser = lxml.etree.XMLParser()
|
||||
@@ -63,5 +63,5 @@ lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVu
|
||||
# iterparse configurations ... this doesn't use a parser argument but takes MOST (!) of
|
||||
# the normal XMLParser arguments. Specifically, it doesn't allow disabling XXE :O
|
||||
|
||||
lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..)
|
||||
lxml.etree.iterparse(xml_file, load_dtd=True, no_network=False) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..)
|
||||
lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file
|
||||
lxml.etree.iterparse(xml_file, load_dtd=True, no_network=False) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file
|
||||
|
||||
@@ -2,7 +2,7 @@ from lxml import etree
|
||||
from io import StringIO
|
||||
|
||||
def test_parse():
|
||||
tree = etree.parse(StringIO('<foo><bar></bar></foo>')) # $ decodeFormat=XML decodeInput=StringIO(..) decodeOutput=etree.parse(..) xmlVuln='XXE'
|
||||
tree = etree.parse(StringIO('<foo><bar></bar></foo>')) # $ decodeFormat=XML decodeInput=StringIO(..) decodeOutput=etree.parse(..) xmlVuln='XXE' getAPathArgument=StringIO(..)
|
||||
r = tree.xpath('/foo/bar') # $ getXPath='/foo/bar'
|
||||
|
||||
def test_XPath_class():
|
||||
|
||||
@@ -2,7 +2,7 @@ match = "dc:title"
|
||||
ns = {'dc': 'http://purl.org/dc/elements/1.1/'}
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
tree = ET.parse('country_data.xml') # $ decodeFormat=XML decodeInput='country_data.xml' decodeOutput=ET.parse(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup'
|
||||
tree = ET.parse('country_data.xml') # $ decodeFormat=XML decodeInput='country_data.xml' decodeOutput=ET.parse(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument='country_data.xml'
|
||||
root = tree.getroot()
|
||||
|
||||
root.find(match, namespaces=ns) # $ getXPath=match
|
||||
|
||||
@@ -16,11 +16,11 @@ xml.etree.ElementTree.XML(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Bi
|
||||
xml.etree.ElementTree.XMLID(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XMLID(..)
|
||||
xml.etree.ElementTree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XMLID(..)
|
||||
|
||||
xml.etree.ElementTree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..)
|
||||
xml.etree.ElementTree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..)
|
||||
xml.etree.ElementTree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..) getAPathArgument=StringIO(..)
|
||||
xml.etree.ElementTree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..) getAPathArgument=StringIO(..)
|
||||
|
||||
xml.etree.ElementTree.iterparse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..)
|
||||
xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..)
|
||||
xml.etree.ElementTree.iterparse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..)
|
||||
xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..)
|
||||
|
||||
|
||||
# With parsers (no options available to disable/enable security features)
|
||||
|
||||
Reference in New Issue
Block a user