Qldoc improvements + add a few extra tests

This commit is contained in:
Joe Farebrother
2024-12-11 12:25:40 +00:00
parent 5c8ef28d12
commit 2019ddfa7f
2 changed files with 17 additions and 8 deletions

View File

@@ -69,6 +69,8 @@ module Lxml {
*/
class XPathCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode {
XPathCall() {
// TODO: lxml.etree.parseid(<text>)[0] will contain the root element from parsing <text>
// but we don't really have a way to model that nicely.
this = [Element::instance(), ElementTree::instance()].getMember("xpath").getACall()
}
@@ -201,9 +203,10 @@ module Lxml {
* A call to either of:
* - `lxml.etree.fromstring`
* - `lxml.etree.fromstringlist`
* -
* - `lxml.etree.HTML`
* - `lxml.etree.XML`
* - `lxml.etree.XMLID`
* - `lxml.etree.XMLDTDID`
* - `lxml.etree.parse`
* - `lxml.etree.parseid`
*
@@ -329,7 +332,7 @@ module Lxml {
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Element::instance()` to get references to instances of `lxml.etree.ElementTree` instances.
* Use the predicate `Element::instance()` to get references to instances of `lxml.etree.Element` instances.
*/
abstract class InstanceSource instanceof API::Node {
/** Gets a textual representation of this element. */
@@ -354,7 +357,8 @@ module Lxml {
etreeRef().getMember("get_default_parser").getReturn()
].getMember("close").getReturn()
or
// TODO: `XMLID` and `parseid` returns a tuple of which the first element is an `Element`
// TODO: `XMLID`, `XMLDTDID`, `parseid` returns a tuple of which the first element is an `Element`.
// `iterparse` returns an iterator of tuples, each of which has a second element that is an `Element`.
this = etreeRef().getMember(["XML", "HTML", "fromstring", "fromstringlist"]).getReturn()
}
}
@@ -393,15 +397,18 @@ module Lxml {
}
}
/** An additional taint step from an `Element` instance. See https://lxml.de/apidoc/lxml.etree.html#lxml.etree.ElementBase */
/**
* An additional taint step from an `Element` instance.
* See https://lxml.de/apidoc/lxml.etree.html#lxml.etree.ElementBase.
*/
private class ElementTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(DataFlow::MethodCallNode call |
nodeTo = call and instance().asSource().flowsTo(nodeFrom)
|
call.calls(nodeFrom,
// We consider a node to be tainted if there could be taint anywhere in the element tree
// So sibling nodes (e.g. `getnext`) are also tainted
// We consider a node to be tainted if there could be taint anywhere in the element tree;
// So sibling nodes (e.g. `getnext`) are also tainted.
// This ensures nodes like `elem[0].getnext()` are tracked.
[
"cssselect", "find", "findall", "findtext", "get", "getchildren", "getiterator",
@@ -445,7 +452,7 @@ module Lxml {
ElementTreeInstance() { this = classRef().getAnInstance() }
}
/** The result of a parst operation that returns an `ElementTree` */
/** The result of a parst operation that returns an `ElementTree`. */
private class ParseResult extends InstanceSource {
ParseResult() { this = etreeRef().getMember("parse").getReturn() }
}

View File

@@ -32,6 +32,7 @@ def test():
elem, # $ tainted
ET.tostring(elem), # $ tainted encodeFormat=XML encodeInput=elem encodeOutput=ET.tostring(..)
ET.tostringlist(elem), # $ tainted encodeFormat=XML encodeInput=elem encodeOutput=ET.tostringlist(..)
ET.tounicode(elem), # $ tainted encodeFormat=XML encodeInput=elem encodeOutput=ET.tounicode(..)
elem.attrib, # $ tainted
elem.base, # $ tainted
elem.nsmap, # $ tainted
@@ -82,7 +83,7 @@ def test():
)
buf = io.StringIO(src)
tree = ET.parse(buf) # $ decodeFormat=XML decodeInput=buf xmlVuln='XXE' decodeOutput=ET.parse(..) SPURIOUS:getAPathArgument=buf # Spurious as this is used as a file-like objectt, not a path
tree = ET.parse(buf) # $ decodeFormat=XML decodeInput=buf xmlVuln='XXE' decodeOutput=ET.parse(..) SPURIOUS:getAPathArgument=buf # Spurious as this is used as a file-like object, not a path
ensure_tainted(
tree, # $ tainted
tree.getroot().text, # $ tainted
@@ -94,6 +95,7 @@ def test():
next(tree.iter()).text, # $ MISSING:tainted
tree.iterfind("b"), # $ tainted
next(tree.iterfind("b")).text, # $ MISSING:tainted
tree.xpath("b")[0].text, # $ tainted getXPath="b"
)
(elem2, ids) = ET.XMLID(src) # $ decodeFormat=XML decodeInput=src xmlVuln='XXE' decodeOutput=ET.XMLID(..)