Python: Port py/xslt-injection to new data-flow

This commit is contained in:
Rasmus Wriedt Larsen
2023-08-17 15:33:01 +02:00
parent ef139f2ee9
commit 4c693b4fc3
6 changed files with 286 additions and 67 deletions

View File

@@ -0,0 +1,110 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.ApiGraphs
/**
* A data-flow node that constructs a XSLT transformer.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `TemplateConstruction::Range` instead.
*/
class XsltConstruction extends DataFlow::Node instanceof XsltConstruction::Range {
/** Gets the argument that specifies the XSLT transformer. */
DataFlow::Node getXsltArg() { result = super.getXsltArg() }
}
/** Provides a class for modeling new system-command execution APIs. */
module XsltConstruction {
/**
* A data-flow node that constructs a XSLT transformer.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XsltConstruction` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the XSLT transformer. */
abstract DataFlow::Node getXsltArg();
}
}
/**
* A data-flow node that executes a XSLT transformer.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `TemplateConstruction::Range` instead.
*/
class XsltExecution extends DataFlow::Node instanceof XsltExecution::Range {
/** Gets the argument that specifies the XSLT transformer. */
DataFlow::Node getXsltArg() { result = super.getXsltArg() }
}
/** Provides a class for modeling new system-command execution APIs. */
module XsltExecution {
/**
* A data-flow node that executes a XSLT transformer.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XsltExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the XSLT transformer. */
abstract DataFlow::Node getXsltArg();
}
}
// -----------------------------------------------------------------------------
/**
* A call to `lxml.etree.XSLT`.
*
* ```py
* from lxml import etree
* xslt_tree = etree.parse(...)
* doc = etree.parse(...)
* transform = etree.XSLT(xslt_tree)
* result = transform(doc)
* ```
*/
class LxmlEtreeXsltCall extends XsltConstruction::Range, API::CallNode {
LxmlEtreeXsltCall() {
this = API::moduleImport("lxml").getMember("etree").getMember("XSLT").getACall()
}
override DataFlow::Node getXsltArg() { result = this.getParameter(0, "xslt_input").asSink() }
}
/**
* A call to `.xslt` on an lxml ElementTree object.
*
* ```py
* from lxml import etree
* xslt_tree = etree.parse(...)
* doc = etree.parse(...)
* result = doc.xslt(xslt_tree)
* ```
*/
class XsltAttributeCall extends XsltExecution::Range, API::CallNode {
XsltAttributeCall() { this = elementTreeConstruction(_).getReturn().getMember("xslt").getACall() }
override DataFlow::Node getXsltArg() { result = this.getParameter(0, "_xslt").asSink() }
}
// -----------------------------------------------------------------------------
API::CallNode elementTreeConstruction(DataFlow::Node inputArg) {
// TODO: If we could, would be nice to model this as flow-summaries. But I'm not sure if we actually can :thinking:
// see https://lxml.de/api/lxml.etree-module.html#fromstring
result = API::moduleImport("lxml").getMember("etree").getMember("fromstring").getACall() and
inputArg = result.getParameter(0, "text").asSink()
or
// see https://lxml.de/api/lxml.etree-module.html#fromstringlist
result = API::moduleImport("lxml").getMember("etree").getMember("fromstringlist").getACall() and
inputArg = result.getParameter(0, "strings").asSink()
or
// TODO: technically we should treat parse differently, since it takes a file as argument
// see https://lxml.de/api/lxml.etree-module.html#parse
result = API::moduleImport("lxml").getMember("etree").getMember("parse").getACall() and
inputArg = result.getParameter(0, "source").asSink()
or
// see https://lxml.de/api/lxml.etree-module.html#XML
result = API::moduleImport("lxml").getMember("etree").getMember("XML").getACall() and
inputArg = result.getParameter(0, "text").asSink()
}

View File

@@ -12,25 +12,10 @@
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import experimental.semmle.python.security.injection.XSLT
import XsltInjectionQuery
import XsltInjectionFlow::PathGraph
class XsltInjectionConfiguration extends TaintTracking::Configuration {
XsltInjectionConfiguration() { this = "XSLT injection configuration" }
deprecated override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
deprecated override predicate isSink(TaintTracking::Sink sink) {
sink instanceof XSLTInjection::XSLTInjectionSink
}
}
from XsltInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This XSLT query depends on $@.", src.getSource(),
"a user-provided value"
from XsltInjectionFlow::PathNode source, XsltInjectionFlow::PathNode sink
where XsltInjectionFlow::flowPath(source, sink)
select sink.getNode(), source, sink, "This XSLT query depends on $@.", source.getNode(),
"user-provided value"

View File

@@ -0,0 +1,58 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "XSLT injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import XsltConcept
/**
* Provides default sources, sinks and sanitizers for detecting
* "XSLT injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module XsltInjection {
/**
* A data flow source for "XSLT injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "XSLT injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "XSLT injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* An XSLT construction, considered as a flow sink.
*/
class XsltConstructionAsSink extends Sink {
XsltConstructionAsSink() { this = any(XsltConstruction c).getXsltArg() }
}
/**
* An XSLT execution, considered as a flow sink.
*/
class XsltExecutionAsSink extends Sink {
XsltExecutionAsSink() { this = any(XsltExecution c).getXsltArg() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends Sanitizer, StringConstCompareBarrier { }
}

View File

@@ -0,0 +1,24 @@
/**
* Provides a taint-tracking configuration for detecting "XSLT injection" vulnerabilities.
*/
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import XsltInjectionCustomizations::XsltInjection
import XsltConcept
module XsltInjectionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { node instanceof Source }
predicate isSink(DataFlow::Node node) { node instanceof Sink }
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// I considered using a FlowState of (raw-string, ElementTree), but in all honesty
// valid code would never have direct flow from a string to a sink anyway... so I
// opted for the more simple approach.
nodeTo = elementTreeConstruction(nodeFrom)
}
}
module XsltInjectionFlow = TaintTracking::Global<XsltInjectionConfig>;