python: implement stdlib xpath support

This commit is contained in:
Rasmus Lerchedahl Petersen
2022-03-02 12:59:34 +01:00
parent 06e0f140c5
commit 80be767a7a
2 changed files with 70 additions and 6 deletions

View File

@@ -2809,6 +2809,70 @@ private module StdlibPrivate {
override string getKind() { result = Escaping::getRegexKind() }
}
// ---------------------------------------------------------------------------
// xml.etree.ElementTree
// ---------------------------------------------------------------------------
/**
* An instance of `xml.etree.ElementTree.ElementTree`.
*
* See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.ElementTree
*/
private API::Node elementTreeInstance() {
//parse to a tree
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("parse")
.getReturn()
or
// construct a tree without parsing
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("ElementTree")
.getReturn()
}
/**
* An instance of `xml.etree.ElementTree.Element`.
*
* See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.Element
*/
private API::Node elementInstance() {
// parse or go to the root of a tree
result = elementTreeInstance().getMember(["parse", "getroot"]).getReturn()
or
// parse directly to an element
result =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["fromstring", "fromstringlist", "XML"])
.getReturn()
}
/**
* A call to a find method on a tree or an element will execute an XPath expression.
*/
private class ElementTreeFindCall extends XPathExecution::Range, DataFlow::CallCfgNode {
string methodName;
ElementTreeFindCall() {
methodName in ["find", "findall", "findtext"] and
(
this = elementTreeInstance().getMember(methodName).getACall()
or
this = elementInstance().getMember(methodName).getACall()
)
}
override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("match")] }
override string getName() { result = "xml.etree" }
}
// ---------------------------------------------------------------------------
// urllib
// ---------------------------------------------------------------------------

View File

@@ -5,14 +5,14 @@ import xml.etree.ElementTree as ET
tree = ET.parse('country_data.xml')
root = tree.getroot()
root.find(match, namespaces=ns) # $ MISSING: getXPath=match
root.findall(match, namespaces=ns) # $ MISSING: getXPath=match
root.findtext(match, default=None, namespaces=ns) # $ MISSING: getXPath=match
root.find(match, namespaces=ns) # $ getXPath=match
root.findall(match, namespaces=ns) # $ getXPath=match
root.findtext(match, default=None, namespaces=ns) # $ getXPath=match
from xml.etree.ElementTree import ElementTree
tree = ElementTree()
tree.parse("index.xhtml")
tree.find(match, namespaces=ns) # $ MISSING: getXPath=match
tree.findall(match, namespaces=ns) # $ MISSING: getXPath=match
tree.findtext(match, default=None, namespaces=ns) # $ MISSING: getXPath=match
tree.find(match, namespaces=ns) # $ getXPath=match
tree.findall(match, namespaces=ns) # $ getXPath=match
tree.findtext(match, default=None, namespaces=ns) # $ getXPath=match