mirror of
https://github.com/github/codeql.git
synced 2026-05-02 20:25:13 +02:00
Python: rewrite xml sax modeling
This commit is contained in:
@@ -64,32 +64,90 @@ private module Xml {
|
||||
}
|
||||
}
|
||||
|
||||
/** Gets a reference to a `parser` that has been set a `feature`. */
|
||||
private DataFlow::Node trackSaxFeature(
|
||||
DataFlow::TypeTracker t, DataFlow::CallCfgNode parser, API::Node feature
|
||||
/**
|
||||
* A call to the `setFeature` method on a XML sax parser.
|
||||
*
|
||||
* See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature
|
||||
*/
|
||||
class SaxParserSetFeatureCall extends DataFlow::MethodCallNode {
|
||||
SaxParserSetFeatureCall() {
|
||||
this =
|
||||
API::moduleImport("xml")
|
||||
.getMember("sax")
|
||||
.getMember("make_parser")
|
||||
.getReturn()
|
||||
.getMember("setFeature")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
// The keyword argument names does not match documentation. I checked (with Python
|
||||
// 3.9.5) that the names used here actually works.
|
||||
DataFlow::Node getFeatureArg() { result in [this.getArg(0), this.getArgByName("name")] }
|
||||
|
||||
DataFlow::Node getStateArg() { result in [this.getArg(1), this.getArgByName("state")] }
|
||||
}
|
||||
|
||||
/** Gets a back-reference to the `setFeature` state argument `arg`. */
|
||||
private DataFlow::TypeTrackingNode saxParserSetFeatureStateArgBacktracker(
|
||||
DataFlow::TypeBackTracker t, DataFlow::Node arg
|
||||
) {
|
||||
t.start() and
|
||||
exists(DataFlow::MethodCallNode featureCall |
|
||||
featureCall = parser.getAMethodCall("setFeature") and
|
||||
featureCall.getArg(0).getALocalSource() = feature.getAUse() and
|
||||
featureCall.getArg(1).getALocalSource() = DataFlow::exprNode(any(True t_)) and
|
||||
result = featureCall.getObject()
|
||||
arg = any(SaxParserSetFeatureCall c).getStateArg() and
|
||||
result = arg.getALocalSource()
|
||||
or
|
||||
exists(DataFlow::TypeBackTracker t2 |
|
||||
result = saxParserSetFeatureStateArgBacktracker(t2, arg).backtrack(t2, t)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets a back-reference to the `setFeature` state argument `arg`. */
|
||||
DataFlow::LocalSourceNode saxParserSetFeatureStateArgBacktracker(DataFlow::Node arg) {
|
||||
result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg)
|
||||
}
|
||||
|
||||
/** Gets a reference to a XML sax parser that has `feature_external_ges` turned on */
|
||||
private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
exists(SaxParserSetFeatureCall call |
|
||||
call.getFeatureArg() =
|
||||
API::moduleImport("xml")
|
||||
.getMember("sax")
|
||||
.getMember("handler")
|
||||
.getMember("feature_external_ges")
|
||||
.getAUse() and
|
||||
saxParserSetFeatureStateArgBacktracker(call.getStateArg())
|
||||
.asExpr()
|
||||
.(BooleanLiteral)
|
||||
.booleanValue() = true and
|
||||
result = call.getObject()
|
||||
)
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
t = t2.smallstep(trackSaxFeature(t2, parser, feature), result)
|
||||
t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result)
|
||||
) and
|
||||
// take account of that we can set the feature to False, which makes the parser safe again
|
||||
not exists(SaxParserSetFeatureCall call |
|
||||
call.getObject() = result and
|
||||
call.getFeatureArg() =
|
||||
API::moduleImport("xml")
|
||||
.getMember("sax")
|
||||
.getMember("handler")
|
||||
.getMember("feature_external_ges")
|
||||
.getAUse() and
|
||||
saxParserSetFeatureStateArgBacktracker(call.getStateArg())
|
||||
.asExpr()
|
||||
.(BooleanLiteral)
|
||||
.booleanValue() = false
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets a reference to a `parser` that has been set a `feature`. */
|
||||
DataFlow::Node trackSaxFeature(DataFlow::CallCfgNode parser, API::Node feature) {
|
||||
result = trackSaxFeature(DataFlow::TypeTracker::end(), parser, feature)
|
||||
/** Gets a reference to a XML sax parser that has been made unsafe for `kind`. */
|
||||
DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() {
|
||||
result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end())
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a call to `xml.sax.make_parser`.
|
||||
*
|
||||
* Given the following example:
|
||||
* A XML parsing call with a sax parser.
|
||||
*
|
||||
* ```py
|
||||
* BadHandler = MainHandler()
|
||||
@@ -99,41 +157,27 @@ private module Xml {
|
||||
* parser.parse(StringIO(xml_content))
|
||||
* parsed_xml = BadHandler._result
|
||||
* ```
|
||||
*
|
||||
* * `this` would be `xml.sax.make_parser()`.
|
||||
* * `getAnInput()`'s result would be `StringIO(xml_content)`.
|
||||
* * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`.
|
||||
*/
|
||||
private class XMLSaxParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
|
||||
XMLSaxParser() {
|
||||
this = API::moduleImport("xml").getMember("sax").getMember("make_parser").getACall()
|
||||
private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
|
||||
XMLSaxParsing() {
|
||||
this =
|
||||
API::moduleImport("xml")
|
||||
.getMember("sax")
|
||||
.getMember("make_parser")
|
||||
.getReturn()
|
||||
.getMember("parse")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getAMethodCall("parse").getArg(0) }
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
|
||||
override predicate vulnerable(XML::XMLVulnerabilityKind kind) {
|
||||
exists(DataFlow::MethodCallNode parse, API::Node handler, API::Node feature |
|
||||
handler = API::moduleImport("xml").getMember("sax").getMember("handler") and
|
||||
parse.calls(trackSaxFeature(this, feature), "parse") and
|
||||
parse.getArg(0) = this.getAnInput() // enough to avoid FPs?
|
||||
|
|
||||
(kind.isXxe() or kind.isDtdRetrieval()) and
|
||||
feature = handler.getMember("feature_external_ges")
|
||||
or
|
||||
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
|
||||
)
|
||||
}
|
||||
|
||||
predicate vulnerable(DataFlow::Node n, XML::XMLVulnerabilityKind kind) {
|
||||
exists(API::Node handler, API::Node feature |
|
||||
handler = API::moduleImport("xml").getMember("sax").getMember("handler") and
|
||||
DataFlow::exprNode(trackSaxFeature(this, feature).asExpr())
|
||||
.(DataFlow::LocalSourceNode)
|
||||
.flowsTo(n)
|
||||
|
|
||||
(kind.isXxe() or kind.isDtdRetrieval()) and
|
||||
feature = handler.getMember("feature_external_ges")
|
||||
)
|
||||
// always vuln to these
|
||||
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
|
||||
or
|
||||
// can be vuln to other things if features has been turned on
|
||||
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
|
||||
(kind.isXxe() or kind.isDtdRetrieval())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -227,8 +227,10 @@ subpaths
|
||||
| xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | xml_dom.py:31:19:31:25 | ControlFlowNode for request | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_dom.py:33:40:33:50 | ControlFlowNode for xml_content | This | xml_dom.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
|
||||
| xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | xml_etree.py:46:19:46:25 | ControlFlowNode for request | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:49:45:49:55 | ControlFlowNode for xml_content | This | xml_etree.py:46:19:46:25 | ControlFlowNode for request | user-provided value |
|
||||
| xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | xml_etree.py:53:19:53:25 | ControlFlowNode for request | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | xml_etree.py:56:45:56:55 | ControlFlowNode for xml_content | This | xml_etree.py:53:19:53:25 | ControlFlowNode for request | user-provided value |
|
||||
| xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:36:18:36:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:31:19:31:25 | ControlFlowNode for request | user-provided value |
|
||||
| xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:49:18:49:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:42:19:42:25 | ControlFlowNode for request | user-provided value |
|
||||
| xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:69:18:69:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:63:19:63:25 | ControlFlowNode for request | user-provided value |
|
||||
| xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:91:22:91:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
|
||||
| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
|
||||
| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
|
||||
| xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:93:22:93:42 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:86:19:86:25 | ControlFlowNode for request | user-provided value |
|
||||
| xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_sax_make_parser.py:103:18:103:38 | ControlFlowNode for StringIO() | This | xml_sax_make_parser.py:98:19:98:25 | ControlFlowNode for request | user-provided value |
|
||||
| xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, Quadratic Blowup. | xml_to_dict.py:17:28:17:38 | ControlFlowNode for xml_content | This | xml_to_dict.py:15:19:15:25 | ControlFlowNode for request | user-provided value |
|
||||
|
||||
Reference in New Issue
Block a user