Merge branch 'main' into deadCode

This commit is contained in:
Erik Krogh Kristensen
2022-03-15 09:19:14 +01:00
245 changed files with 5325 additions and 814 deletions

View File

@@ -1,3 +1,13 @@
## 0.0.11
### Minor Analysis Improvements
* Added new SSRF sinks for `httpx`, `pycurl`, `urllib`, `urllib2`, `urllib3`, and `libtaxii`. This improvement was [submitted by @haby0](https://github.com/github/codeql/pull/8275).
* The regular expression parser now groups sequences of normal characters. This reduces the number of instances of `RegExpNormalChar`.
* Fixed taint propagation for attribute assignment. In the assignment `x.foo = tainted` we no longer treat the entire object `x` as tainted, just because the attribute `foo` contains tainted data. This leads to slightly fewer false positives.
* Improved analysis of attributes for data-flow and taint tracking queries, so `getattr`/`setattr` are supported, and a write to an attribute properly stops flow for the old value in that attribute.
* Added post-update nodes (`DataFlow::PostUpdateNode`) for arguments in calls that can't be resolved.
## 0.0.10
### Deprecated APIs

View File

@@ -1,5 +0,0 @@
---
category: minorAnalysis
---
* Improved analysis of attributes for data-flow and taint tracking queries, so `getattr`/`setattr` are supported, and a write to an attribute properly stops flow for the old value in that attribute.
* Added post-update nodes (`DataFlow::PostUpdateNode`) for arguments in calls that can't be resolved.

View File

@@ -0,0 +1,5 @@
---
category: deprecated
---
* Some modules that started with a lowercase letter have been renamed to follow our style-guide.
The old name still exists as a deprecated alias.

View File

@@ -1,4 +0,0 @@
---
category: minorAnalysis
---
* Fixed taint propagation for attribute assignment. In the assignment `x.foo = tainted` we no longer treat the entire object `x` as tainted, just because the attribute `foo` contains tainted data. This leads to slightly fewer false positives.

View File

@@ -1,4 +0,0 @@
---
category: minorAnalysis
---
* The regular expression parser now groups sequences of normal characters. This reduces the number of instances of `RegExpNormalChar`.

View File

@@ -1,4 +0,0 @@
---
category: minorAnalysis
---
* Added new SSRF sinks for `httpx`, `pycurl`, `urllib`, `urllib2`, `urllib3`, and `libtaxii`. This improvement was [submitted by @haby0](https://github.com/github/codeql/pull/8275).

View File

@@ -0,0 +1,4 @@
---
category: breaking
---
* The flow state variants of `isBarrier` and `isAdditionalFlowStep` are no longer exposed in the taint tracking library. The `isSanitizer` and `isAdditionalTaintStep` predicates should be used instead.

View File

@@ -0,0 +1,9 @@
## 0.0.11
### Minor Analysis Improvements
* Added new SSRF sinks for `httpx`, `pycurl`, `urllib`, `urllib2`, `urllib3`, and `libtaxii`. This improvement was [submitted by @haby0](https://github.com/github/codeql/pull/8275).
* The regular expression parser now groups sequences of normal characters. This reduces the number of instances of `RegExpNormalChar`.
* Fixed taint propagation for attribute assignment. In the assignment `x.foo = tainted` we no longer treat the entire object `x` as tainted, just because the attribute `foo` contains tainted data. This leads to slightly fewer false positives.
* Improved analysis of attributes for data-flow and taint tracking queries, so `getattr`/`setattr` are supported, and a write to an attribute properly stops flow for the old value in that attribute.
* Added post-update nodes (`DataFlow::PostUpdateNode`) for arguments in calls that can't be resolved.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.0.10
lastReleaseVersion: 0.0.11

View File

@@ -1,5 +1,5 @@
name: codeql/python-all
version: 0.0.11-dev
version: 0.0.12-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python

View File

@@ -27,9 +27,9 @@ class FunctionMetrics extends Function {
* P = the number of connected components, which for a single function is 1.
*/
int getCyclomaticComplexity() {
exists(int E, int N |
N = count(BasicBlock b | b = this.getABasicBlock() and b.likelyReachable()) and
E =
exists(int e, int n |
n = count(BasicBlock b | b = this.getABasicBlock() and b.likelyReachable()) and
e =
count(BasicBlock b1, BasicBlock b2 |
b1 = this.getABasicBlock() and
b1.likelyReachable() and
@@ -39,7 +39,7 @@ class FunctionMetrics extends Function {
not b1.unlikelySuccessor(b2)
)
|
result = E - N + 2
result = e - n + 2
)
}

View File

@@ -39,8 +39,11 @@ predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos)
//--------
predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr }
/** DEPRECATED: Alias for `SyntheticPreUpdateNode` */
deprecated module syntheticPreUpdateNode = SyntheticPreUpdateNode;
/** A module collecting the different reasons for synthesising a pre-update node. */
module syntheticPreUpdateNode {
module SyntheticPreUpdateNode {
class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
NeedsSyntheticPreUpdateNode post;
@@ -78,10 +81,13 @@ module syntheticPreUpdateNode {
CfgNode objectCreationNode() { result.getNode().(CallNode) = any(ClassCall c).getNode() }
}
import syntheticPreUpdateNode
import SyntheticPreUpdateNode
/** DEPRECATED: Alias for `SyntheticPostUpdateNode` */
deprecated module syntheticPostUpdateNode = SyntheticPostUpdateNode;
/** A module collecting the different reasons for synthesising a post-update node. */
module syntheticPostUpdateNode {
module SyntheticPostUpdateNode {
/** A post-update node is synthesized for all nodes which satisfy `NeedsSyntheticPostUpdateNode`. */
class SyntheticPostUpdateNode extends PostUpdateNode, TSyntheticPostUpdateNode {
NeedsSyntheticPostUpdateNode pre;
@@ -177,7 +183,7 @@ module syntheticPostUpdateNode {
}
}
import syntheticPostUpdateNode
import SyntheticPostUpdateNode
class DataFlowExpr = Expr;

View File

@@ -64,13 +64,30 @@ abstract class Configuration extends DataFlow::Configuration {
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
* Holds if `source` is a relevant taint source with the given initial
* `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) { none() }
/**
* Holds if `sink` is a relevant taint sink
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink) { none() }
/**
* Holds if `sink` is a relevant taint sink accepting `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -79,6 +96,16 @@ abstract class Configuration extends DataFlow::Configuration {
defaultTaintSanitizer(node)
}
/**
* Holds if the node `node` is a taint sanitizer when the flow state is
* `state`.
*/
predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) { none() }
final override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
this.isSanitizer(node, state)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
@@ -107,6 +134,25 @@ abstract class Configuration extends DataFlow::Configuration {
defaultAdditionalTaintStep(node1, node2)
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
* must be taken into account in the analysis. This step is only applicable
* in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalTaintStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
none()
}
final override predicate isAdditionalFlowStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
this.isAdditionalTaintStep(node1, state1, node2, state2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
defaultImplicitTaintRead(node, c)

View File

@@ -64,13 +64,30 @@ abstract class Configuration extends DataFlow::Configuration {
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
* Holds if `source` is a relevant taint source with the given initial
* `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) { none() }
/**
* Holds if `sink` is a relevant taint sink
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink) { none() }
/**
* Holds if `sink` is a relevant taint sink accepting `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -79,6 +96,16 @@ abstract class Configuration extends DataFlow::Configuration {
defaultTaintSanitizer(node)
}
/**
* Holds if the node `node` is a taint sanitizer when the flow state is
* `state`.
*/
predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) { none() }
final override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
this.isSanitizer(node, state)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
@@ -107,6 +134,25 @@ abstract class Configuration extends DataFlow::Configuration {
defaultAdditionalTaintStep(node1, node2)
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
* must be taken into account in the analysis. This step is only applicable
* in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalTaintStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
none()
}
final override predicate isAdditionalFlowStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
this.isAdditionalTaintStep(node1, state1, node2, state2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
defaultImplicitTaintRead(node, c)

View File

@@ -64,13 +64,30 @@ abstract class Configuration extends DataFlow::Configuration {
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
* Holds if `source` is a relevant taint source with the given initial
* `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) { none() }
/**
* Holds if `sink` is a relevant taint sink
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink) { none() }
/**
* Holds if `sink` is a relevant taint sink accepting `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -79,6 +96,16 @@ abstract class Configuration extends DataFlow::Configuration {
defaultTaintSanitizer(node)
}
/**
* Holds if the node `node` is a taint sanitizer when the flow state is
* `state`.
*/
predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) { none() }
final override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
this.isSanitizer(node, state)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
@@ -107,6 +134,25 @@ abstract class Configuration extends DataFlow::Configuration {
defaultAdditionalTaintStep(node1, node2)
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
* must be taken into account in the analysis. This step is only applicable
* in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalTaintStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
none()
}
final override predicate isAdditionalFlowStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
this.isAdditionalTaintStep(node1, state1, node2, state2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
defaultImplicitTaintRead(node, c)

View File

@@ -64,13 +64,30 @@ abstract class Configuration extends DataFlow::Configuration {
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
* Holds if `source` is a relevant taint source with the given initial
* `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) { none() }
/**
* Holds if `sink` is a relevant taint sink
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink) { none() }
/**
* Holds if `sink` is a relevant taint sink accepting `state`.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -79,6 +96,16 @@ abstract class Configuration extends DataFlow::Configuration {
defaultTaintSanitizer(node)
}
/**
* Holds if the node `node` is a taint sanitizer when the flow state is
* `state`.
*/
predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) { none() }
final override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
this.isSanitizer(node, state)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
@@ -107,6 +134,25 @@ abstract class Configuration extends DataFlow::Configuration {
defaultAdditionalTaintStep(node1, node2)
}
/**
* Holds if the additional taint propagation step from `node1` to `node2`
* must be taken into account in the analysis. This step is only applicable
* in `state1` and updates the flow state to `state2`.
*/
predicate isAdditionalTaintStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
none()
}
final override predicate isAdditionalFlowStep(
DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2,
DataFlow::FlowState state2
) {
this.isAdditionalTaintStep(node1, state1, node2, state2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
defaultImplicitTaintRead(node, c)

View File

@@ -526,8 +526,11 @@ module PrivateDjango {
/** Gets a reference to the `django` module. */
API::Node django() { result = API::moduleImport("django") }
/** DEPRECATED: Alias for `DjangoImpl` */
deprecated module django = DjangoImpl;
/** Provides models for the `django` module. */
module django {
module DjangoImpl {
// -------------------------------------------------------------------------
// django.db
// -------------------------------------------------------------------------
@@ -541,8 +544,11 @@ module PrivateDjango {
DjangoDb() { this = API::moduleImport("django").getMember("db") }
}
/** DEPRECATED: Alias for `DB` */
deprecated module db = DB;
/** Provides models for the `django.db` module. */
module db {
module DB {
/** Gets a reference to the `django.db.connection` object. */
API::Node connection() { result = db().getMember("connection") }
@@ -557,8 +563,11 @@ module PrivateDjango {
/** Gets a reference to the `django.db.models` module. */
API::Node models() { result = db().getMember("models") }
/** DEPRECATED: Alias for `Models` */
deprecated module models = Models;
/** Provides models for the `django.db.models` module. */
module models {
module Models {
/**
* Provides models for the `django.db.models.Model` class and subclasses.
*
@@ -608,8 +617,11 @@ module PrivateDjango {
/** Gets a reference to the `django.db.models.expressions` module. */
API::Node expressions() { result = models().getMember("expressions") }
/** DEPRECATED: Alias for `Expressions` */
deprecated module expressions = Expressions;
/** Provides models for the `django.db.models.expressions` module. */
module expressions {
module Expressions {
/** Provides models for the `django.db.models.expressions.RawSql` class. */
module RawSql {
/**
@@ -662,8 +674,8 @@ module PrivateDjango {
DataFlow::Node sql;
ObjectsAnnotate() {
this = django::db::models::querySetReturningMethod("annotate").getACall() and
django::db::models::expressions::RawSql::instance(sql) in [
this = DjangoImpl::DB::Models::querySetReturningMethod("annotate").getACall() and
DjangoImpl::DB::Models::Expressions::RawSql::instance(sql) in [
this.getArg(_), this.getArgByName(_)
]
}
@@ -680,8 +692,8 @@ module PrivateDjango {
DataFlow::Node sql;
ObjectsAlias() {
this = django::db::models::querySetReturningMethod("alias").getACall() and
django::db::models::expressions::RawSql::instance(sql) in [
this = DjangoImpl::DB::Models::querySetReturningMethod("alias").getACall() and
DjangoImpl::DB::Models::Expressions::RawSql::instance(sql) in [
this.getArg(_), this.getArgByName(_)
]
}
@@ -697,7 +709,7 @@ module PrivateDjango {
* - https://docs.djangoproject.com/en/3.1/ref/models/querysets/#raw
*/
private class ObjectsRaw extends SqlExecution::Range, DataFlow::CallCfgNode {
ObjectsRaw() { this = django::db::models::querySetReturningMethod("raw").getACall() }
ObjectsRaw() { this = DjangoImpl::DB::Models::querySetReturningMethod("raw").getACall() }
override DataFlow::Node getSql() { result = this.getArg(0) }
}
@@ -708,7 +720,7 @@ module PrivateDjango {
* See https://docs.djangoproject.com/en/3.1/ref/models/querysets/#extra
*/
private class ObjectsExtra extends SqlExecution::Range, DataFlow::CallCfgNode {
ObjectsExtra() { this = django::db::models::querySetReturningMethod("extra").getACall() }
ObjectsExtra() { this = DjangoImpl::DB::Models::querySetReturningMethod("extra").getACall() }
override DataFlow::Node getSql() {
result in [
@@ -723,8 +735,11 @@ module PrivateDjango {
/** Gets a reference to the `django.urls` module. */
API::Node urls() { result = django().getMember("urls") }
/** DEPRECATED: Alias for `Urls` */
deprecated module urls = Urls;
/** Provides models for the `django.urls` module */
module urls {
module Urls {
/**
* Gets a reference to the `django.urls.path` function.
* See https://docs.djangoproject.com/en/3.0/ref/urls/#path
@@ -744,10 +759,16 @@ module PrivateDjango {
/** Gets a reference to the `django.conf` module. */
API::Node conf() { result = django().getMember("conf") }
/** DEPRECATED: Alias for `Conf` */
deprecated module conf = Conf;
/** Provides models for the `django.conf` module */
module conf {
module Conf {
/** DEPRECATED: Alias for `ConfUrls` */
deprecated module conf_urls = ConfUrls;
/** Provides models for the `django.conf.urls` module */
module conf_urls {
module ConfUrls {
// -------------------------------------------------------------------------
// django.conf.urls
// -------------------------------------------------------------------------
@@ -770,16 +791,22 @@ module PrivateDjango {
/** Gets a reference to the `django.http` module. */
API::Node http() { result = django().getMember("http") }
/** DEPRECATED: Alias for `Http` */
deprecated module http = Http;
/** Provides models for the `django.http` module */
module http {
module Http {
// ---------------------------------------------------------------------------
// django.http.request
// ---------------------------------------------------------------------------
/** Gets a reference to the `django.http.request` module. */
API::Node request() { result = http().getMember("request") }
/** DEPRECATED: Alias for `Request` */
deprecated module request = Request;
/** Provides models for the `django.http.request` module. */
module request {
module Request {
/**
* Provides models for the `django.http.request.HttpRequest` class
*
@@ -860,7 +887,7 @@ module PrivateDjango {
// special handling of the `build_absolute_uri` method, see
// https://docs.djangoproject.com/en/3.0/ref/request-response/#django.http.HttpRequest.build_absolute_uri
exists(DataFlow::AttrRead attr, DataFlow::CallCfgNode call, DataFlow::Node instance |
instance = django::http::request::HttpRequest::instance() and
instance = DjangoImpl::Http::Request::HttpRequest::instance() and
attr.getObject() = instance
|
attr.getAttributeName() = "build_absolute_uri" and
@@ -937,8 +964,11 @@ module PrivateDjango {
/** Gets a reference to the `django.http.response` module. */
API::Node response() { result = http().getMember("response") }
/** DEPRECATED: Alias for `Response` */
deprecated module response = Response;
/** Provides models for the `django.http.response` module */
module response {
module Response {
/**
* Provides models for the `django.http.response.HttpResponse` class
*
@@ -1672,17 +1702,17 @@ module PrivateDjango {
/** Gets a reference to the `django.http.response.HttpResponse.write` function. */
private DataFlow::TypeTrackingNode write(
django::http::response::HttpResponse::InstanceSource instance, DataFlow::TypeTracker t
DjangoImpl::Http::Response::HttpResponse::InstanceSource instance, DataFlow::TypeTracker t
) {
t.startInAttr("write") and
instance = django::http::response::HttpResponse::instance() and
instance = DjangoImpl::Http::Response::HttpResponse::instance() and
result = instance
or
exists(DataFlow::TypeTracker t2 | result = write(instance, t2).track(t2, t))
}
/** Gets a reference to the `django.http.response.HttpResponse.write` function. */
DataFlow::Node write(django::http::response::HttpResponse::InstanceSource instance) {
DataFlow::Node write(DjangoImpl::Http::Response::HttpResponse::InstanceSource instance) {
write(instance, DataFlow::TypeTracker::end()).flowsTo(result)
}
@@ -1692,7 +1722,7 @@ module PrivateDjango {
* See https://docs.djangoproject.com/en/3.1/ref/request-response/#django.http.HttpResponse.write
*/
class HttpResponseWriteCall extends HTTP::Server::HttpResponse::Range, DataFlow::CallCfgNode {
django::http::response::HttpResponse::InstanceSource instance;
DjangoImpl::Http::Response::HttpResponse::InstanceSource instance;
HttpResponseWriteCall() { this.getFunction() = write(instance) }
@@ -1713,7 +1743,7 @@ module PrivateDjango {
class DjangoResponseSetCookieCall extends HTTP::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
DjangoResponseSetCookieCall() {
this.calls(django::http::response::HttpResponse::instance(), "set_cookie")
this.calls(DjangoImpl::Http::Response::HttpResponse::instance(), "set_cookie")
}
override DataFlow::Node getHeaderArg() { none() }
@@ -1733,7 +1763,7 @@ module PrivateDjango {
class DjangoResponseDeleteCookieCall extends HTTP::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
DjangoResponseDeleteCookieCall() {
this.calls(django::http::response::HttpResponse::instance(), "delete_cookie")
this.calls(DjangoImpl::Http::Response::HttpResponse::instance(), "delete_cookie")
}
override DataFlow::Node getHeaderArg() { none() }
@@ -1760,7 +1790,7 @@ module PrivateDjango {
this.asCfgNode() = subscript
|
cookieLookup.getAttributeName() = "cookies" and
cookieLookup.getObject() = django::http::response::HttpResponse::instance() and
cookieLookup.getObject() = DjangoImpl::Http::Response::HttpResponse::instance() and
exists(DataFlow::Node subscriptObj |
subscriptObj.asCfgNode() = subscript.getObject()
|
@@ -1786,8 +1816,11 @@ module PrivateDjango {
/** Gets a reference to the `django.shortcuts` module. */
API::Node shortcuts() { result = django().getMember("shortcuts") }
/** DEPRECATED: Alias for `Shortcuts` */
deprecated module shortcuts = Shortcuts;
/** Provides models for the `django.shortcuts` module */
module shortcuts {
module Shortcuts {
/**
* Gets a reference to the `django.shortcuts.redirect` function
*
@@ -2063,7 +2096,7 @@ module PrivateDjango {
* See https://docs.djangoproject.com/en/3.0/ref/urls/#path
*/
private class DjangoUrlsPathCall extends DjangoRouteSetup, DataFlow::CallCfgNode {
DjangoUrlsPathCall() { this = django::urls::path().getACall() }
DjangoUrlsPathCall() { this = DjangoImpl::Urls::path().getACall() }
override DataFlow::Node getUrlPatternArg() {
result in [this.getArg(0), this.getArgByName("route")]
@@ -2146,7 +2179,7 @@ module PrivateDjango {
*/
private class DjangoUrlsRePathCall extends DjangoRegexRouteSetup, DataFlow::CallCfgNode {
DjangoUrlsRePathCall() {
this = django::urls::re_path().getACall() and
this = DjangoImpl::Urls::re_path().getACall() and
// `django.conf.urls.url` (which we support directly with
// `DjangoConfUrlsUrlCall`), is implemented in Django 2+ as backward compatibility
// using `django.urls.re_path`. See
@@ -2176,7 +2209,7 @@ module PrivateDjango {
* See https://docs.djangoproject.com/en/1.11/ref/urls/#django.conf.urls.url
*/
private class DjangoConfUrlsUrlCall extends DjangoRegexRouteSetup, DataFlow::CallCfgNode {
DjangoConfUrlsUrlCall() { this = django::conf::conf_urls::url().getACall() }
DjangoConfUrlsUrlCall() { this = DjangoImpl::Conf::ConfUrls::url().getACall() }
override DataFlow::Node getUrlPatternArg() {
result in [this.getArg(0), this.getArgByName("regex")]
@@ -2189,7 +2222,7 @@ module PrivateDjango {
// HttpRequest taint modeling
// ---------------------------------------------------------------------------
/** A parameter that will receive the django `HttpRequest` instance when a request handler is invoked. */
private class DjangoRequestHandlerRequestParam extends django::http::request::HttpRequest::InstanceSource,
private class DjangoRequestHandlerRequestParam extends DjangoImpl::Http::Request::HttpRequest::InstanceSource,
RemoteFlowSource::Range, DataFlow::ParameterNode {
DjangoRequestHandlerRequestParam() {
this.getParameter() = any(DjangoRouteSetup setup).getARequestHandler().getRequestParam()
@@ -2206,7 +2239,7 @@ module PrivateDjango {
*
* See https://docs.djangoproject.com/en/3.1/topics/class-based-views/generic-display/#dynamic-filtering
*/
private class DjangoViewClassRequestAttributeRead extends django::http::request::HttpRequest::InstanceSource,
private class DjangoViewClassRequestAttributeRead extends DjangoImpl::Http::Request::HttpRequest::InstanceSource,
RemoteFlowSource::Range, DataFlow::Node {
DjangoViewClassRequestAttributeRead() {
exists(DataFlow::AttrRead read | this = read |
@@ -2253,7 +2286,7 @@ module PrivateDjango {
*/
private class DjangoShortcutsRedirectCall extends HTTP::Server::HttpRedirectResponse::Range,
DataFlow::CallCfgNode {
DjangoShortcutsRedirectCall() { this = django::shortcuts::redirect().getACall() }
DjangoShortcutsRedirectCall() { this = DjangoImpl::Shortcuts::redirect().getACall() }
/**
* Gets the data-flow node that specifies the location of this HTTP redirect response.

View File

@@ -24,7 +24,7 @@ private module FabricV1 {
API::Node fabric() { result = API::moduleImport("fabric") }
/** Provides models for the `fabric` module. */
module fabric {
module Fabric {
// -------------------------------------------------------------------------
// fabric.api
// -------------------------------------------------------------------------
@@ -32,7 +32,7 @@ private module FabricV1 {
API::Node api() { result = fabric().getMember("api") }
/** Provides models for the `fabric.api` module */
module api {
module Api {
/**
* A call to either
* - `fabric.api.local`
@@ -66,7 +66,7 @@ private module FabricV2 {
API::Node fabric() { result = API::moduleImport("fabric") }
/** Provides models for the `fabric` module. */
module fabric {
module Fabric {
// -------------------------------------------------------------------------
// fabric.connection
// -------------------------------------------------------------------------
@@ -74,13 +74,13 @@ private module FabricV2 {
API::Node connection() { result = fabric().getMember("connection") }
/** Provides models for the `fabric.connection` module */
module connection {
module Connection {
/**
* Provides models for the `fabric.connection.Connection` class
*
* See https://docs.fabfile.org/en/2.5/api/connection.html#fabric.connection.Connection.
*/
module Connection {
module ConnectionClass {
/** Gets a reference to the `fabric.connection.Connection` class. */
API::Node classRef() {
result = fabric().getMember("Connection")
@@ -155,7 +155,7 @@ private module FabricV2 {
private class FabricConnectionRunSudoLocalCall extends SystemCommandExecution::Range,
DataFlow::CallCfgNode {
FabricConnectionRunSudoLocalCall() {
this.getFunction() = fabric::connection::Connection::instanceRunMethods()
this.getFunction() = Fabric::Connection::ConnectionClass::instanceRunMethods()
}
override DataFlow::Node getCommand() {
@@ -170,16 +170,16 @@ private module FabricV2 {
API::Node tasks() { result = fabric().getMember("tasks") }
/** Provides models for the `fabric.tasks` module */
module tasks {
module Tasks {
/** Gets a reference to the `fabric.tasks.task` decorator. */
API::Node task() { result in [tasks().getMember("task"), fabric().getMember("task")] }
}
class FabricTaskFirstParamConnectionInstance extends fabric::connection::Connection::InstanceSource,
class FabricTaskFirstParamConnectionInstance extends Fabric::Connection::ConnectionClass::InstanceSource,
DataFlow::ParameterNode {
FabricTaskFirstParamConnectionInstance() {
exists(Function func |
func.getADecorator() = fabric::tasks::task().getAUse().asExpr() and
func.getADecorator() = Fabric::Tasks::task().getAUse().asExpr() and
this.getParameter() = func.getArg(0)
)
}
@@ -192,7 +192,7 @@ private module FabricV2 {
API::Node group() { result = fabric().getMember("group") }
/** Provides models for the `fabric.group` module */
module group {
module Group {
/**
* Provides models for the `fabric.group.Group` class and its subclasses.
*
@@ -204,7 +204,7 @@ private module FabricV2 {
* - https://docs.fabfile.org/en/2.5/api/group.html#fabric.group.SerialGroup
* - https://docs.fabfile.org/en/2.5/api/group.html#fabric.group.ThreadingGroup
*/
module Group {
module GroupClass {
/**
* A source of instances of a subclass of `fabric.group, extend this class to model new instances.Group`
*
@@ -236,7 +236,9 @@ private module FabricV2 {
* See https://docs.fabfile.org/en/2.5/api/group.html#fabric.group.Group.run
*/
private class FabricGroupRunCall extends SystemCommandExecution::Range, DataFlow::CallCfgNode {
FabricGroupRunCall() { this = fabric::group::Group::subclassInstanceRunMethod().getACall() }
FabricGroupRunCall() {
this = Fabric::Group::GroupClass::subclassInstanceRunMethod().getACall()
}
override DataFlow::Node getCommand() {
result = [this.getArg(0), this.getArgByName("command")]
@@ -249,7 +251,7 @@ private module FabricV2 {
* See https://docs.fabfile.org/en/2.5/api/group.html#fabric.group.SerialGroup.
*/
module SerialGroup {
private class ClassInstantiation extends Group::ModeledSubclass {
private class ClassInstantiation extends GroupClass::ModeledSubclass {
ClassInstantiation() {
this = group().getMember("SerialGroup")
or
@@ -264,7 +266,7 @@ private module FabricV2 {
* See https://docs.fabfile.org/en/2.5/api/group.html#fabric.group.ThreadingGroup.
*/
module ThreadingGroup {
private class ClassInstantiation extends Group::ModeledSubclass {
private class ClassInstantiation extends GroupClass::ModeledSubclass {
ClassInstantiation() {
this = group().getMember("ThreadingGroup")
or

View File

@@ -20,11 +20,11 @@ private module Invoke {
API::Node invoke() { result = API::moduleImport("invoke") }
/** Provides models for the `invoke` module. */
module invoke {
module InvokeModule {
/** Provides models for the `invoke.context` module */
module context {
module Context {
/** Provides models for the `invoke.context.Context` class */
module Context {
module ContextClass {
/** Gets a reference to the `invoke.context.Context` class. */
API::Node classRef() {
result = API::moduleImport("invoke").getMember("context").getMember("Context")
@@ -36,7 +36,7 @@ private module Invoke {
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
(
result = invoke::context::Context::classRef().getACall()
result = InvokeModule::Context::ContextClass::classRef().getACall()
or
exists(Function func |
func.getADecorator() = invoke().getMember("task").getAUse().asExpr() and
@@ -53,7 +53,7 @@ private module Invoke {
/** Gets a reference to the `run` or `sudo` methods on a `invoke.context.Context` instance. */
private DataFlow::TypeTrackingNode instanceRunMethods(DataFlow::TypeTracker t) {
t.startInAttr(["run", "sudo"]) and
result = invoke::context::Context::instance()
result = InvokeModule::Context::ContextClass::instance()
or
exists(DataFlow::TypeTracker t2 | result = instanceRunMethods(t2).track(t2, t))
}
@@ -74,7 +74,7 @@ private module Invoke {
private class InvokeRunCommandCall extends SystemCommandExecution::Range, DataFlow::CallCfgNode {
InvokeRunCommandCall() {
this = invoke().getMember(["run", "sudo"]).getACall() or
this.getFunction() = invoke::context::Context::instanceRunMethods()
this.getFunction() = InvokeModule::Context::ContextClass::instanceRunMethods()
}
override DataFlow::Node getCommand() {

View File

@@ -25,7 +25,7 @@ private module Mysql {
// mysql
// ---------------------------------------------------------------------------
/** Provides models for the `mysql` module. */
module mysql {
module MysqlMod {
/**
* The mysql.connector module
* See https://dev.mysql.com/doc/connector-python/en/connector-python-example-connecting.html

View File

@@ -220,7 +220,7 @@ private module RestFramework {
*
* Use the predicate `Request::instance()` to get references to instances of `rest_framework.request.Request`.
*/
abstract class InstanceSource extends PrivateDjango::django::http::request::HttpRequest::InstanceSource {
abstract class InstanceSource extends PrivateDjango::DjangoImpl::Http::Request::HttpRequest::InstanceSource {
}
/** A direct instantiation of `rest_framework.request.Request`. */
@@ -296,7 +296,7 @@ private module RestFramework {
}
/** A direct instantiation of `rest_framework.response.Response`. */
private class ClassInstantiation extends PrivateDjango::django::http::response::HttpResponse::InstanceSource,
private class ClassInstantiation extends PrivateDjango::DjangoImpl::Http::Response::HttpResponse::InstanceSource,
DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }

View File

@@ -308,7 +308,7 @@ private module StdlibPrivate {
API::Node os() { result = API::moduleImport("os") }
/** Provides models for the `os` module. */
module os {
module OS {
/** Gets a reference to the `os.path` module. */
API::Node path() {
result = os().getMember("path")
@@ -323,7 +323,7 @@ private module StdlibPrivate {
}
/** Provides models for the `os.path` module */
module path {
module OsPath {
/** Gets a reference to the `os.path.join` function. */
API::Node join() { result = path().getMember("join") }
}
@@ -945,7 +945,7 @@ private module StdlibPrivate {
// these raise errors if the file does not exist
"getatime", "getmtime", "getctime", "getsize"
] and
this = os::path().getMember(name).getACall()
this = OS::path().getMember(name).getACall()
}
override DataFlow::Node getAPathArgument() {
@@ -961,7 +961,7 @@ private module StdlibPrivate {
/** A call to `os.path.samefile` will raise an exception if an `os.stat()` call on either pathname fails. */
private class OsPathSamefileCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
OsPathSamefileCall() { this = os::path().getMember("samefile").getACall() }
OsPathSamefileCall() { this = OS::path().getMember("samefile").getACall() }
override DataFlow::Node getAPathArgument() {
result in [
@@ -995,7 +995,7 @@ private module StdlibPrivate {
OsPathComputation() {
methodName = pathComputation() and
this = os::path().getMember(methodName).getACall()
this = OS::path().getMember(methodName).getACall()
}
DataFlow::Node getPathArg() {
@@ -1022,7 +1022,7 @@ private module StdlibPrivate {
* See https://docs.python.org/3/library/os.path.html#os.path.normpath
*/
private class OsPathNormpathCall extends Path::PathNormalization::Range, DataFlow::CallCfgNode {
OsPathNormpathCall() { this = os::path().getMember("normpath").getACall() }
OsPathNormpathCall() { this = OS::path().getMember("normpath").getACall() }
override DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] }
}
@@ -1032,7 +1032,7 @@ private module StdlibPrivate {
* See https://docs.python.org/3/library/os.path.html#os.path.abspath
*/
private class OsPathAbspathCall extends Path::PathNormalization::Range, DataFlow::CallCfgNode {
OsPathAbspathCall() { this = os::path().getMember("abspath").getACall() }
OsPathAbspathCall() { this = OS::path().getMember("abspath").getACall() }
override DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] }
}
@@ -1042,7 +1042,7 @@ private module StdlibPrivate {
* See https://docs.python.org/3/library/os.path.html#os.path.realpath
*/
private class OsPathRealpathCall extends Path::PathNormalization::Range, DataFlow::CallCfgNode {
OsPathRealpathCall() { this = os::path().getMember("realpath").getACall() }
OsPathRealpathCall() { this = OS::path().getMember("realpath").getACall() }
override DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] }
}
@@ -1143,7 +1143,7 @@ private module StdlibPrivate {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(CallNode call |
nodeTo.asCfgNode() = call and
call = os::path::join().getACall().asCfgNode() and
call = OS::OsPath::join().getACall().asCfgNode() and
call.getAnArg() = nodeFrom.asCfgNode()
)
// TODO: Handle pathlib (like we do for os.path.join)
@@ -1662,7 +1662,7 @@ private module StdlibPrivate {
API::Node cgi() { result = API::moduleImport("cgi") }
/** Provides models for the `cgi` module. */
module cgi {
module Cgi {
/**
* Provides models for the `cgi.FieldStorage` class
*
@@ -1862,7 +1862,7 @@ private module StdlibPrivate {
API::Node http() { result = API::moduleImport("http") }
/** Provides models for the `http` module. */
module http {
module Http {
// -------------------------------------------------------------------------
// http.server
// -------------------------------------------------------------------------
@@ -1870,7 +1870,7 @@ private module StdlibPrivate {
API::Node server() { result = http().getMember("server") }
/** Provides models for the `http.server` module */
module server {
module Server {
/**
* Provides models for the `http.server.BaseHTTPRequestHandler` class (Python 3 only).
*
@@ -1926,9 +1926,9 @@ private module StdlibPrivate {
SimpleHttpServer::SimpleHttpRequestHandler::classRef(),
CGIHTTPServer::CGIHTTPRequestHandler::classRef(),
// Python 3
http::server::BaseHttpRequestHandler::classRef(),
http::server::SimpleHttpRequestHandler::classRef(),
http::server::CGIHTTPRequestHandler::classRef()
Http::Server::BaseHttpRequestHandler::classRef(),
Http::Server::SimpleHttpRequestHandler::classRef(),
Http::Server::CGIHTTPRequestHandler::classRef()
].getASubclass*()
}

View File

@@ -72,7 +72,7 @@ private module Tornado {
API::Node tornado() { result = API::moduleImport("tornado") }
/** Provides models for the `tornado` module. */
module tornado {
module TornadoModule {
// -------------------------------------------------------------------------
// tornado.web
// -------------------------------------------------------------------------
@@ -80,7 +80,7 @@ private module Tornado {
API::Node web() { result = tornado().getMember("web") }
/** Provides models for the `tornado.web` module */
module web {
module Web {
/**
* Provides models for the `tornado.web.RequestHandler` class and subclasses.
*
@@ -199,7 +199,7 @@ private module Tornado {
override string getAsyncMethodName() { none() }
}
private class RequestAttrAccess extends tornado::httputil::HttpServerRequest::InstanceSource {
private class RequestAttrAccess extends TornadoModule::HttpUtil::HttpServerRequest::InstanceSource {
RequestAttrAccess() {
this.(DataFlow::AttrRead).getObject() = instance() and
this.(DataFlow::AttrRead).getAttributeName() = "request"
@@ -263,7 +263,7 @@ private module Tornado {
API::Node httputil() { result = tornado().getMember("httputil") }
/** Provides models for the `tornado.httputil` module */
module httputil {
module HttpUtil {
/**
* Provides models for the `tornado.httputil.HttpServerRequest` class
*
@@ -359,12 +359,14 @@ private module Tornado {
// ---------------------------------------------------------------------------
/** Gets a sequence that defines a number of route rules */
SequenceNode routeSetupRuleList() {
exists(CallNode call | call = any(tornado::web::Application::ClassInstantiation c).asCfgNode() |
exists(CallNode call |
call = any(TornadoModule::Web::Application::ClassInstantiation c).asCfgNode()
|
result in [call.getArg(0), call.getArgByName("handlers")]
)
or
exists(CallNode call |
call.getFunction() = tornado::web::Application::add_handlers().asCfgNode()
call.getFunction() = TornadoModule::Web::Application::add_handlers().asCfgNode()
|
result in [call.getArg(1), call.getArgByName("host_handlers")]
)
@@ -406,7 +408,7 @@ private module Tornado {
override DataFlow::Node getUrlPatternArg() { result.asCfgNode() = node.getElement(0) }
override Function getARequestHandler() {
exists(tornado::web::RequestHandler::RequestHandlerClass cls |
exists(TornadoModule::Web::RequestHandler::RequestHandlerClass cls |
cls.getARef().asCfgNode() = node.getElement(1) and
result = cls.getARequestHandler()
)
@@ -437,7 +439,7 @@ private module Tornado {
/** A request handler defined in a tornado RequestHandler class, that has no known route. */
private class TornadoRequestHandlerWithoutKnownRoute extends HTTP::Server::RequestHandler::Range {
TornadoRequestHandlerWithoutKnownRoute() {
exists(tornado::web::RequestHandler::RequestHandlerClass cls |
exists(TornadoModule::Web::RequestHandler::RequestHandlerClass cls |
cls.getARequestHandler() = this
) and
not exists(TornadoRouteSetup setup | setup.getARequestHandler() = this)
@@ -465,7 +467,7 @@ private module Tornado {
private class TornadoRequestHandlerRedirectCall extends HTTP::Server::HttpRedirectResponse::Range,
DataFlow::CallCfgNode {
TornadoRequestHandlerRedirectCall() {
this.getFunction() = tornado::web::RequestHandler::redirectMethod()
this.getFunction() = TornadoModule::Web::RequestHandler::redirectMethod()
}
override DataFlow::Node getRedirectLocation() {
@@ -487,7 +489,7 @@ private module Tornado {
private class TornadoRequestHandlerWriteCall extends HTTP::Server::HttpResponse::Range,
DataFlow::CallCfgNode {
TornadoRequestHandlerWriteCall() {
this.getFunction() = tornado::web::RequestHandler::writeMethod()
this.getFunction() = TornadoModule::Web::RequestHandler::writeMethod()
}
override DataFlow::Node getBody() { result in [this.getArg(0), this.getArgByName("chunk")] }
@@ -505,7 +507,7 @@ private module Tornado {
class TornadoRequestHandlerSetCookieCall extends HTTP::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
TornadoRequestHandlerSetCookieCall() {
this.calls(tornado::web::RequestHandler::instance(), "set_cookie")
this.calls(TornadoModule::Web::RequestHandler::instance(), "set_cookie")
}
override DataFlow::Node getHeaderArg() { none() }

View File

@@ -47,7 +47,7 @@ module PathInjection {
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
override predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) {
// Block `NotNormalized` paths here, since they change state to `NormalizedUnchecked`
node instanceof Path::PathNormalization and
state instanceof NotNormalized
@@ -60,7 +60,7 @@ module PathInjection {
guard instanceof SanitizerGuard
}
override predicate isAdditionalFlowStep(
override predicate isAdditionalTaintStep(
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
DataFlow::FlowState stateTo
) {

View File

@@ -18,9 +18,9 @@ predicate mapping_format(StrConst e) {
*/
private string conversion_specifier_string(StrConst e, int number, int position) {
exists(string s, string REGEX | s = e.getText() |
REGEX = "%(\\([^)]*\\))?[#0\\- +]*(\\*|[0-9]*)(\\.(\\*|[0-9]*))?(h|H|l|L)?[badiouxXeEfFgGcrs%]" and
result = s.regexpFind(REGEX, number, position)
exists(string s, string regex | s = e.getText() |
regex = "%(\\([^)]*\\))?[#0\\- +]*(\\*|[0-9]*)(\\.(\\*|[0-9]*))?(h|H|l|L)?[badiouxXeEfFgGcrs%]" and
result = s.regexpFind(regex, number, position)
)
}

View File

@@ -1,3 +1,9 @@
## 0.0.11
### New Queries
* The query "XPath query built from user-controlled sources" (`py/xpath-injection`) has been promoted from experimental to the main query pack. Its results will now appear by default. This query was originally [submitted as an experimental query by @porcupineyhairs](https://github.com/github/codeql/pull/6331).
## 0.0.10
### New Queries

View File

@@ -1,4 +1,5 @@
---
category: newQuery
---
## 0.0.11
### New Queries
* The query "XPath query built from user-controlled sources" (`py/xpath-injection`) has been promoted from experimental to the main query pack. Its results will now appear by default. This query was originally [submitted as an experimental query by @porcupineyhairs](https://github.com/github/codeql/pull/6331).

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.0.10
lastReleaseVersion: 0.0.11

View File

@@ -0,0 +1,25 @@
/**
* @name SimpleXMLRPCServer DoS vulnerability
* @description SimpleXMLRPCServer is vulnerable to DoS attacks from untrusted user input
* @kind problem
* @problem.severity warning
* @precision high
* @id py/simple-xml-rpc-server-dos
* @tags security
* external/cwe/cwe-776
*/
private import python
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
from DataFlow::CallCfgNode call, string kinds
where
call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and
kinds =
strictconcat(ExperimentalXML::XMLVulnerabilityKind kind |
kind.isBillionLaughs() or kind.isQuadraticBlowup()
|
kind, ", "
)
select call, "SimpleXMLRPCServer is vulnerable to: " + kinds + "."

View File

@@ -0,0 +1,4 @@
<?xml version="1.0"?>
<!DOCTYPE dt [
<!ENTITY xxe SYSTEM "file:///etc/passwd">]>
<test>&xxe;</test>

View File

@@ -0,0 +1,25 @@
from flask import request, Flask
import lxml.etree
import xml.etree.ElementTree
app = Flask(__name__)
# BAD
@app.route("/bad")
def bad():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser()
parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
return parsed_xml.text
# GOOD
@app.route("/good")
def good():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(resolve_entities=False)
parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
return parsed_xml.text

View File

@@ -0,0 +1,48 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Parsing untrusted XML files with a weakly configured XML parser may lead to attacks such as XML External Entity (XXE),
Billion Laughs, Quadratic Blowup and DTD retrieval.
This type of attack uses external entity references to access arbitrary files on a system, carry out denial of
service, or server side request forgery. Even when the result of parsing is not returned to the user, out-of-band
data retrieval techniques may allow attackers to steal sensitive data. Denial of services can also be carried out
in this situation.
</p>
</overview>
<recommendation>
<p>
Use <a href="https://pypi.org/project/defusedxml/">defusedxml</a>, a Python package aimed
to prevent any potentially malicious operation.
</p>
</recommendation>
<example>
<p>
The following example calls <code>xml.etree.ElementTree.fromstring</code> using a parser (<code>lxml.etree.XMLParser</code>)
that is not safely configured on untrusted data, and is therefore inherently unsafe.
</p>
<sample src="XmlEntityInjection.py"/>
<p>
Providing an input (<code>xml_content</code>) like the following XML content against /bad, the request response would contain the contents of
<code>/etc/passwd</code>.
</p>
<sample src="XXE.xml"/>
</example>
<references>
<li>Python 3 <a href="https://docs.python.org/3/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.</li>
<li>Python 2 <a href="https://docs.python.org/2/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.</li>
<li>Python <a href="https://www.edureka.co/blog/python-xml-parser-tutorial/">XML Parsing</a>.</li>
<li>OWASP vulnerability description: <a href="https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Processing">XML External Entity (XXE) Processing</a>.</li>
<li>OWASP guidance on parsing xml files: <a href="https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#python">XXE Prevention Cheat Sheet</a>.</li>
<li>Paper by Timothy Morgen: <a href="https://research.nccgroup.com/2014/05/19/xml-schema-dtd-and-entity-attacks-a-compendium-of-known-techniques/">XML Schema, DTD, and Entity Attacks</a></li>
<li>Out-of-band data retrieval: Timur Yunusov &amp; Alexey Osipov, Black hat EU 2013: <a href="https://www.slideshare.net/qqlan/bh-ready-v4">XML Out-Of-Band Data Retrieval</a>.</li>
<li>Denial of service attack (Billion laughs): <a href="https://en.wikipedia.org/wiki/Billion_laughs">Billion Laughs.</a></li>
</references>
</qhelp>

View File

@@ -0,0 +1,31 @@
/**
* @name XML Entity injection
* @description User input should not be parsed allowing the injection of entities.
* @kind path-problem
* @problem.severity error
* @id py/xml-entity-injection
* @tags security
* external/cwe/cwe-611
* external/cwe/cwe-776
* external/cwe/cwe-827
*/
// determine precision above
import python
import experimental.semmle.python.security.dataflow.XmlEntityInjection
import DataFlow::PathGraph
from
XmlEntityInjection::XmlEntityInjectionConfiguration config, DataFlow::PathNode source,
DataFlow::PathNode sink, string kinds
where
config.hasFlowPath(source, sink) and
kinds =
strictconcat(string kind |
kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind()
|
kind, ", "
)
select sink.getNode(), source, sink,
"$@ XML input is constructed from a $@ and is vulnerable to: " + kinds + ".", sink.getNode(),
"This", source.getNode(), "user-provided value"

View File

@@ -14,6 +14,74 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import experimental.semmle.python.Frameworks
/**
* Since there is both XML module in normal and experimental Concepts,
* we have to rename the experimental module as this.
*/
module ExperimentalXML {
/**
* A kind of XML vulnerability.
*
* See https://pypi.org/project/defusedxml/#python-xml-libraries
*/
class XMLVulnerabilityKind extends string {
XMLVulnerabilityKind() {
this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval"]
}
/** Holds for Billion Laughs vulnerability kind. */
predicate isBillionLaughs() { this = "Billion Laughs" }
/** Holds for Quadratic Blowup vulnerability kind. */
predicate isQuadraticBlowup() { this = "Quadratic Blowup" }
/** Holds for XXE vulnerability kind. */
predicate isXxe() { this = "XXE" }
/** Holds for DTD retrieval vulnerability kind. */
predicate isDtdRetrieval() { this = "DTD retrieval" }
}
/**
* A data-flow node that parses XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParsing` instead.
*/
class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range {
/**
* Gets the argument containing the content to parse.
*/
DataFlow::Node getAnInput() { result = super.getAnInput() }
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
predicate vulnerableTo(XMLVulnerabilityKind kind) { super.vulnerableTo(kind) }
}
/** Provides classes for modeling XML parsing APIs. */
module XMLParsing {
/**
* A data-flow node that parses XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParsing` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the content to parse.
*/
abstract DataFlow::Node getAnInput();
/**
* Holds if this XML parsing is vulnerable to `kind`.
*/
abstract predicate vulnerableTo(XMLVulnerabilityKind kind);
}
}
}
/** Provides classes for modeling LDAP query execution-related APIs. */
module LdapQuery {
/**

View File

@@ -3,6 +3,7 @@
*/
private import experimental.semmle.python.frameworks.Stdlib
private import experimental.semmle.python.frameworks.Xml
private import experimental.semmle.python.frameworks.Flask
private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Werkzeug

View File

@@ -11,15 +11,15 @@ private import semmle.python.ApiGraphs
import semmle.python.dataflow.new.RemoteFlowSources
private module ExperimentalPrivateDjango {
private module django {
private module DjangoMod {
API::Node http() { result = API::moduleImport("django").getMember("http") }
module http {
module Http {
API::Node response() { result = http().getMember("response") }
API::Node request() { result = http().getMember("request") }
module request {
module Request {
module HttpRequest {
class DjangoGETParameter extends DataFlow::Node, RemoteFlowSource::Range {
DjangoGETParameter() { this = request().getMember("GET").getMember("get").getACall() }
@@ -29,7 +29,7 @@ private module ExperimentalPrivateDjango {
}
}
module response {
module Response {
module HttpResponse {
API::Node baseClassRef() {
result = response().getMember("HttpResponse").getReturn()

View File

@@ -12,7 +12,7 @@ private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private module Werkzeug {
module datastructures {
module Datastructures {
module Headers {
class WerkzeugHeaderAddCall extends DataFlow::CallCfgNode, HeaderDeclaration::Range {
WerkzeugHeaderAddCall() {

View File

@@ -0,0 +1,466 @@
/**
* Provides class and predicates to track external data that
* may represent malicious XML objects.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
module XML = ExperimentalXML;
private module XmlEtree {
/**
* Provides models for `xml.etree` parsers
*
* See
* - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLParser
* - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLPullParser
*/
module XMLParser {
/**
* A source of instances of `xml.etree` parsers, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `XMLParser::instance()` to get references to instances of `xml.etree` parsers.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `xml.etree` parsers. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("XMLParser")
.getACall()
or
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("XMLPullParser")
.getACall()
}
}
/** Gets a reference to an `xml.etree` parser instance. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an `xml.etree` parser instance. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* A call to the `feed` method of an `xml.etree` parser.
*/
private class XMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
XMLEtreeParserFeedCall() { this.calls(instance(), "feed") }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
kind.isBillionLaughs() or kind.isQuadraticBlowup()
}
}
}
/**
* A call to either of:
* - `xml.etree.ElementTree.fromstring`
* - `xml.etree.ElementTree.fromstringlist`
* - `xml.etree.ElementTree.XML`
* - `xml.etree.ElementTree.XMLID`
* - `xml.etree.ElementTree.parse`
* - `xml.etree.ElementTree.iterparse`
*/
private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLEtreeParsing() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"])
.getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// fromstring / XML / XMLID
this.getArgByName("text"),
// fromstringlist
this.getArgByName("sequence"),
// parse / iterparse
this.getArgByName("source"),
]
}
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// note: it does not matter what `xml.etree` parser you are using, you cannot
// change the security features anyway :|
kind.isBillionLaughs() or kind.isQuadraticBlowup()
}
}
}
private module SaxBasedParsing {
/**
* A call to the `setFeature` method on a XML sax parser.
*
* See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature
*/
class SaxParserSetFeatureCall extends DataFlow::MethodCallNode {
SaxParserSetFeatureCall() {
this =
API::moduleImport("xml")
.getMember("sax")
.getMember("make_parser")
.getReturn()
.getMember("setFeature")
.getACall()
}
// The keyword argument names does not match documentation. I checked (with Python
// 3.9.5) that the names used here actually works.
DataFlow::Node getFeatureArg() { result in [this.getArg(0), this.getArgByName("name")] }
DataFlow::Node getStateArg() { result in [this.getArg(1), this.getArgByName("state")] }
}
/** Gets a back-reference to the `setFeature` state argument `arg`. */
private DataFlow::TypeTrackingNode saxParserSetFeatureStateArgBacktracker(
DataFlow::TypeBackTracker t, DataFlow::Node arg
) {
t.start() and
arg = any(SaxParserSetFeatureCall c).getStateArg() and
result = arg.getALocalSource()
or
exists(DataFlow::TypeBackTracker t2 |
result = saxParserSetFeatureStateArgBacktracker(t2, arg).backtrack(t2, t)
)
}
/** Gets a back-reference to the `setFeature` state argument `arg`. */
DataFlow::LocalSourceNode saxParserSetFeatureStateArgBacktracker(DataFlow::Node arg) {
result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg)
}
/**
* Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
*
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
*/
private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) {
t.start() and
exists(SaxParserSetFeatureCall call |
call.getFeatureArg() =
API::moduleImport("xml")
.getMember("sax")
.getMember("handler")
.getMember("feature_external_ges")
.getAUse() and
saxParserSetFeatureStateArgBacktracker(call.getStateArg())
.asExpr()
.(BooleanLiteral)
.booleanValue() = true and
result = call.getObject()
)
or
exists(DataFlow::TypeTracker t2 |
t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result)
) and
// take account of that we can set the feature to False, which makes the parser safe again
not exists(SaxParserSetFeatureCall call |
call.getObject() = result and
call.getFeatureArg() =
API::moduleImport("xml")
.getMember("sax")
.getMember("handler")
.getMember("feature_external_ges")
.getAUse() and
saxParserSetFeatureStateArgBacktracker(call.getStateArg())
.asExpr()
.(BooleanLiteral)
.booleanValue() = false
)
}
/**
* Gets a reference to a XML sax parser that has `feature_external_ges` turned on.
*
* See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
*/
DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() {
result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end())
}
/**
* A call to the `parse` method on a SAX XML parser.
*/
private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
XMLSaxInstanceParsing() {
this =
API::moduleImport("xml")
.getMember("sax")
.getMember("make_parser")
.getReturn()
.getMember("parse")
.getACall()
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// always vuln to these
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
or
// can be vuln to other things if features has been turned on
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
}
}
/**
* A call to either `parse` or `parseString` from `xml.sax` module.
*
* See:
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse
* - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString
*/
private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
XMLSaxParsing() {
this =
API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// parseString
this.getArgByName("string"),
// parse
this.getArgByName("source"),
]
}
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// always vuln to these
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
or
// can be vuln to other things if features has been turned on
this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
}
}
/**
* A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`.
*
* Both of these modules are based on SAX parsers.
*/
private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLDomParsing() {
this =
API::moduleImport("xml")
.getMember("dom")
.getMember(["minidom", "pulldom"])
.getMember(["parse", "parseString"])
.getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// parseString
this.getArgByName("string"),
// minidom.parse
this.getArgByName("file"),
// pulldom.parse
this.getArgByName("stream_or_string"),
]
}
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and
(kind.isXxe() or kind.isDtdRetrieval())
or
(kind.isBillionLaughs() or kind.isQuadraticBlowup())
}
}
}
private module Lxml {
/**
* Provides models for `lxml.etree` parsers.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
module XMLParser {
/**
* A source of instances of `lxml.etree` parsers, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode {
/** Holds if this instance is vulnerable to `kind`. */
abstract predicate vulnerableTo(XML::XMLVulnerabilityKind kind);
}
/**
* A call to `lxml.etree.XMLParser`.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
*/
private class LXMLParser extends InstanceSource, DataFlow::CallCfgNode {
LXMLParser() {
this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall()
}
// NOTE: it's not possible to change settings of a parser after constructing it
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
kind.isXxe() and
(
// resolve_entities has default True
not exists(this.getArgByName("resolve_entities"))
or
this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t)
)
or
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and
not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False t)
or
kind.isDtdRetrieval() and
this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and
this.getArgByName("no_network").getALocalSource().asExpr() = any(False t)
}
}
/**
* A call to `lxml.etree.get_default_parser`.
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser
*/
private class LXMLDefaultParser extends InstanceSource, DataFlow::CallCfgNode {
LXMLDefaultParser() {
this =
API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall()
}
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
// as highlighted by
// https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
// by default XXE is allow. so as long as the default parser has not been
// overridden, the result is also vuln to XXE.
kind.isXxe()
// TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`.
}
}
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) {
t.start() and
result = origin
or
exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t))
}
/** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */
DataFlow::Node instance(InstanceSource origin) {
instance(DataFlow::TypeTracker::end(), origin).flowsTo(result)
}
/** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */
DataFlow::Node instanceVulnerableTo(XML::XMLVulnerabilityKind kind) {
exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind))
}
/**
* A call to the `feed` method of an `lxml` parser.
*/
private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range {
LXMLParserFeedCall() { this.calls(instance(_), "feed") }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
this.calls(instanceVulnerableTo(kind), "feed")
}
}
}
/**
* A call to either of:
* - `lxml.etree.fromstring`
* - `lxml.etree.fromstringlist`
* - `lxml.etree.XML`
* - `lxml.etree.parse`
* - `lxml.etree.parseid`
*
* See https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring
*/
private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
LXMLParsing() {
this =
API::moduleImport("lxml")
.getMember("etree")
.getMember(["fromstring", "fromstringlist", "XML", "parse", "parseid"])
.getACall()
}
override DataFlow::Node getAnInput() {
result in [
this.getArg(0),
// fromstring / XML
this.getArgByName("text"),
// fromstringlist
this.getArgByName("strings"),
// parse / parseid
this.getArgByName("source"),
]
}
DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] }
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
this.getParserArg() = XMLParser::instanceVulnerableTo(kind)
or
kind.isXxe() and
not exists(this.getParserArg())
}
}
}
private module Xmltodict {
/**
* A call to `xmltodict.parse`.
*/
private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("xml_input")]
}
override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) {
(kind.isBillionLaughs() or kind.isQuadraticBlowup()) and
this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f)
}
}
}

View File

@@ -0,0 +1,28 @@
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
module XmlEntityInjection {
import XmlEntityInjectionCustomizations::XmlEntityInjection
class XmlEntityInjectionConfiguration extends TaintTracking::Configuration {
XmlEntityInjectionConfiguration() { this = "XmlEntityInjectionConfiguration" }
override predicate isSource(DataFlow::Node source) {
source instanceof RemoteFlowSourceAsSource
}
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
any(AdditionalTaintStep s).step(nodeFrom, nodeTo)
}
}
}

View File

@@ -0,0 +1,86 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "ldap injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.ApiGraphs
/**
* Provides default sources, sinks and sanitizers for detecting "xml injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module XmlEntityInjection {
/**
* A data flow source for "xml injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "xml injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node {
/** Gets the kind of XML injection that this sink is vulnerable to. */
abstract string getVulnerableKind();
}
/**
* A sanitizer guard for "xml injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A unit class for adding additional taint steps.
*
* Extend this class to add additional taint steps that should apply to `XmlEntityInjection`
* taint configuration.
*/
class AdditionalTaintStep extends Unit {
/**
* Holds if the step from `nodeFrom` to `nodeTo` should be considered a taint
* step for `XmlEntityInjection` configuration.
*/
abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo);
}
/**
* An input to a direct XML parsing function, considered as a flow sink.
*
* See `XML::XMLParsing`.
*/
class XMLParsingInputAsSink extends Sink {
ExperimentalXML::XMLParsing xmlParsing;
XMLParsingInputAsSink() { this = xmlParsing.getAnInput() }
override string getVulnerableKind() { xmlParsing.vulnerableTo(result) }
}
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
/**
* A taint step for `io`'s `StringIO` and `BytesIO` methods.
*/
class IoAdditionalTaintStep extends AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(DataFlow::CallCfgNode ioCalls |
ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and
nodeFrom = ioCalls.getArg(0) and
nodeTo = ioCalls
)
}
}
}

View File

@@ -19,13 +19,13 @@ module NoSqlInjection {
state instanceof ConvertedToDict
}
override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
override predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) {
// Block `RemoteInput` paths here, since they change state to `ConvertedToDict`
exists(Decoding decoding | decoding.getFormat() = "JSON" and node = decoding.getOutput()) and
state instanceof RemoteInput
}
override predicate isAdditionalFlowStep(
override predicate isAdditionalTaintStep(
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
DataFlow::FlowState stateTo
) {

View File

@@ -1,5 +1,5 @@
name: codeql/python-queries
version: 0.0.11-dev
version: 0.0.12-dev
groups:
- python
- queries

View File

@@ -0,0 +1,33 @@
import python
import experimental.semmle.python.Concepts
import experimental.semmle.python.frameworks.Xml
import semmle.python.dataflow.new.DataFlow
import TestUtilities.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode
class XmlParsingTest extends InlineExpectationsTest {
XmlParsingTest() { this = "XmlParsingTest" }
override string getARelevantTag() { result in ["input", "vuln"] }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(XML::XMLParsing parsing |
exists(DataFlow::Node input |
input = parsing.getAnInput() and
location = input.getLocation() and
element = input.toString() and
value = prettyNodeForInlineTest(input) and
tag = "input"
)
or
exists(XML::XMLVulnerabilityKind kind |
parsing.vulnerableTo(kind) and
location = parsing.getLocation() and
element = parsing.toString() and
value = "'" + kind + "'" and
tag = "vuln"
)
)
}
}

View File

@@ -0,0 +1,54 @@
from io import StringIO
import lxml.etree
x = "some xml"
# different parsing methods
lxml.etree.fromstring(x) # $ input=x vuln='XXE'
lxml.etree.fromstring(text=x) # $ input=x vuln='XXE'
lxml.etree.fromstringlist([x]) # $ input=List vuln='XXE'
lxml.etree.fromstringlist(strings=[x]) # $ input=List vuln='XXE'
lxml.etree.XML(x) # $ input=x vuln='XXE'
lxml.etree.XML(text=x) # $ input=x vuln='XXE'
lxml.etree.parse(StringIO(x)) # $ input=StringIO(..) vuln='XXE'
lxml.etree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE'
lxml.etree.parseid(StringIO(x)) # $ input=StringIO(..) vuln='XXE'
lxml.etree.parseid(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE'
# With default parsers (nothing changed)
parser = lxml.etree.XMLParser()
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE'
parser = lxml.etree.get_default_parser()
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE'
# manual use of feed method
parser = lxml.etree.XMLParser()
parser.feed(x) # $ input=x vuln='XXE'
parser.feed(data=x) # $ input=x vuln='XXE'
parser.close()
# XXE-safe
parser = lxml.etree.XMLParser(resolve_entities=False)
lxml.etree.fromstring(x, parser) # $ input=x
lxml.etree.fromstring(x, parser=parser) # $ input=x
# XXE-vuln
parser = lxml.etree.XMLParser(resolve_entities=True)
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE'
# Billion laughs vuln (also XXE)
parser = lxml.etree.XMLParser(huge_tree=True)
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' vuln='XXE'
# Safe for both Billion laughs and XXE
parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True)
lxml.etree.fromstring(x, parser=parser) # $ input=x
# DTD retrival vuln (also XXE)
parser = lxml.etree.XMLParser(load_dtd=True, no_network=False)
lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='DTD retrieval' vuln='XXE'

View File

@@ -0,0 +1,677 @@
#!/usr/bin/env python3
# this file doesn't have a .py extension so the extractor doesn't pick it up, so it
# doesn't have to be annotated
# This file shows the ways to make exploit vulnerable XML parsing
# see
# https://pypi.org/project/defusedxml/#python-xml-libraries
# https://docs.python.org/3.10/library/xml.html#xml-vulnerabilities
import pathlib
from flask import Flask
import threading
import multiprocessing
import time
from io import StringIO
import pytest
HOST = "localhost"
PORT = 8080
FLAG_PATH = pathlib.Path(__file__).with_name("flag")
# ==============================================================================
# xml samples
ok_xml = f"""<?xml version="1.0"?>
<test>hello world</test>
"""
local_xxe = f"""<?xml version="1.0"?>
<!DOCTYPE dt [
<!ENTITY xxe SYSTEM "file://{FLAG_PATH}">
]>
<test>&xxe;</test>
"""
remote_xxe = f"""<?xml version="1.0"?>
<!DOCTYPE dt [
<!ENTITY remote_xxe SYSTEM "http://{HOST}:{PORT}/xxe">
]>
<test>&remote_xxe;</test>
"""
billion_laughs = """<?xml version="1.0"?>
<!DOCTYPE lolz [
<!ENTITY lol "lol">
<!ELEMENT lolz (#PCDATA)>
<!ENTITY lol1 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">
<!ENTITY lol2 "&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;">
<!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">
<!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">
<!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;">
<!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;">
<!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;">
<!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;">
<!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;">
]>
<lolz>&lol9;</lolz>"""
quadratic_blowup = f"""<?xml version="1.0"?>
<!DOCTYPE wolo [
<!ENTITY oops "{"a" * 100000}">
]>
<foo>{"&oops;"*20000}</foo>"""
dtd_retrieval = f"""<?xml version="1.0"?>
<!DOCTYPE dt PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://{HOST}:{PORT}/test.dtd">
<foo>bar</foo>
"""
# ==============================================================================
# other setup
# we set up local Flask application so we can tests whether loading external resources
# works (such as SSRF from DTD-retrival works)
app = Flask(__name__)
@app.route("/alive")
def alive():
return "ok"
hit_dtd = False
@app.route("/test.dtd")
def test_dtd():
global hit_dtd
hit_dtd = True
return """<?xml version="1.0" encoding="UTF-8"?>"""
hit_xxe = False
@app.route("/xxe")
def test_xxe():
global hit_xxe
hit_xxe = True
return "ok"
def run_app():
app.run(host=HOST, port=PORT)
@pytest.fixture(scope="session", autouse=True)
def flask_app_running():
# run flask in other thread
flask_thread = threading.Thread(target=run_app, daemon=True)
flask_thread.start()
# give flask a bit of time to start
time.sleep(0.1)
# ensure that the server works
import requests
requests.get(f"http://{HOST}:{PORT}/alive")
yield
def expects_timeout(func):
def inner():
proc = multiprocessing.Process(target=func)
proc.start()
time.sleep(0.1)
assert proc.exitcode == None
proc.kill()
proc.join()
return inner
class TestExpectsTimeout:
"test that expects_timeout works as expected"
@staticmethod
@expects_timeout
def test_slow():
time.sleep(1000)
@staticmethod
def test_fast():
@expects_timeout
def fast_func():
return "done!"
with pytest.raises(AssertionError):
fast_func()
# ==============================================================================
import xml.sax
import xml.sax.handler
class SimpleHandler(xml.sax.ContentHandler):
def __init__(self):
self.result = []
def characters(self, data):
self.result.append(data)
class TestSax():
# always vuln to billion laughs, quadratic
@staticmethod
@expects_timeout
def test_billion_laughs_allowed_by_default():
parser = xml.sax.make_parser()
parser.parse(StringIO(billion_laughs))
@staticmethod
@expects_timeout
def test_quardratic_blowup_allowed_by_default():
parser = xml.sax.make_parser()
parser.parse(StringIO(quadratic_blowup))
@staticmethod
def test_ok_xml():
handler = SimpleHandler()
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.parse(StringIO(ok_xml))
assert handler.result == ["hello world"], handler.result
@staticmethod
def test_xxe_disabled_by_default():
handler = SimpleHandler()
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.parse(StringIO(local_xxe))
assert handler.result == [], handler.result
@staticmethod
def test_local_xxe_manually_enabled():
handler = SimpleHandler()
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(local_xxe))
assert handler.result[0] == "SECRET_FLAG", handler.result
@staticmethod
def test_remote_xxe_manually_enabled():
global hit_xxe
hit_xxe = False
handler = SimpleHandler()
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(remote_xxe))
assert handler.result == ["ok"], handler.result
assert hit_xxe == True
@staticmethod
def test_dtd_disabled_by_default():
global hit_dtd
hit_dtd = False
parser = xml.sax.make_parser()
parser.parse(StringIO(dtd_retrieval))
assert hit_dtd == False
@staticmethod
def test_dtd_manually_enabled():
global hit_dtd
hit_dtd = False
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(dtd_retrieval))
assert hit_dtd == True
# ==============================================================================
import xml.etree.ElementTree
class TestEtree:
# always vuln to billion laughs, quadratic
@staticmethod
@expects_timeout
def test_billion_laughs_allowed_by_default():
parser = xml.etree.ElementTree.XMLParser()
_root = xml.etree.ElementTree.fromstring(billion_laughs, parser=parser)
@staticmethod
@expects_timeout
def test_quardratic_blowup_allowed_by_default():
parser = xml.etree.ElementTree.XMLParser()
_root = xml.etree.ElementTree.fromstring(quadratic_blowup, parser=parser)
@staticmethod
def test_ok_xml():
parser = xml.etree.ElementTree.XMLParser()
root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser)
assert root.tag == "test"
assert root.text == "hello world"
@staticmethod
def test_ok_xml_sax_parser():
# you _can_ pass a SAX parser to xml.etree... but it doesn't give you the output :|
parser = xml.sax.make_parser()
root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser)
assert root == None
@staticmethod
def test_ok_xml_lxml_parser():
# this is technically possible, since parsers follow the same API, and the
# `fromstring` function is just a thin wrapper... seems very unlikely that
# anyone would do this though :|
parser = lxml.etree.XMLParser()
root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser)
assert root.tag == "test"
assert root.text == "hello world"
@staticmethod
def test_xxe_not_possible():
parser = xml.etree.ElementTree.XMLParser()
try:
_root = xml.etree.ElementTree.fromstring(local_xxe, parser=parser)
assert False
except xml.etree.ElementTree.ParseError as e:
assert "undefined entity &xxe" in str(e)
@staticmethod
def test_dtd_not_possible():
global hit_dtd
hit_dtd = False
parser = xml.etree.ElementTree.XMLParser()
_root = xml.etree.ElementTree.fromstring(dtd_retrieval, parser=parser)
assert hit_dtd == False
# ==============================================================================
import lxml.etree
class TestLxml:
# see https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser
@staticmethod
def test_billion_laughs_disabled_by_default():
parser = lxml.etree.XMLParser()
try:
_root = lxml.etree.fromstring(billion_laughs, parser=parser)
assert False
except lxml.etree.XMLSyntaxError as e:
assert "Detected an entity reference loop" in str(e)
@staticmethod
def test_quardratic_blowup_disabled_by_default():
parser = lxml.etree.XMLParser()
try:
_root = lxml.etree.fromstring(quadratic_blowup, parser=parser)
assert False
except lxml.etree.XMLSyntaxError as e:
assert "Detected an entity reference loop" in str(e)
@staticmethod
@expects_timeout
def test_billion_laughs_manually_enabled():
parser = lxml.etree.XMLParser(huge_tree=True)
root = lxml.etree.fromstring(billion_laughs, parser=parser)
@staticmethod
@expects_timeout
def test_quadratic_blowup_manually_enabled():
parser = lxml.etree.XMLParser(huge_tree=True)
root = lxml.etree.fromstring(quadratic_blowup, parser=parser)
@staticmethod
def test_billion_laughs_huge_tree_not_enough():
parser = lxml.etree.XMLParser(huge_tree=True, resolve_entities=False)
root = lxml.etree.fromstring(billion_laughs, parser=parser)
assert root.tag == "lolz"
assert root.text == None
@staticmethod
def test_quadratic_blowup_huge_tree_not_enough():
parser = lxml.etree.XMLParser(huge_tree=True, resolve_entities=False)
root = lxml.etree.fromstring(quadratic_blowup, parser=parser)
assert root.tag == "foo"
assert root.text == None
@staticmethod
def test_ok_xml():
parser = lxml.etree.XMLParser()
root = lxml.etree.fromstring(ok_xml, parser=parser)
assert root.tag == "test"
assert root.text == "hello world"
@staticmethod
def test_local_xxe_enabled_by_default():
parser = lxml.etree.XMLParser()
root = lxml.etree.fromstring(local_xxe, parser=parser)
assert root.tag == "test"
assert root.text == "SECRET_FLAG\n", root.text
@staticmethod
def test_local_xxe_disabled():
parser = lxml.etree.XMLParser(resolve_entities=False)
root = lxml.etree.fromstring(local_xxe, parser=parser)
assert root.tag == "test"
assert root.text == None
@staticmethod
def test_remote_xxe_disabled_by_default():
global hit_xxe
hit_xxe = False
parser = lxml.etree.XMLParser()
try:
root = lxml.etree.fromstring(remote_xxe, parser=parser)
assert False
except lxml.etree.XMLSyntaxError as e:
assert "Failure to process entity remote_xxe" in str(e)
assert hit_xxe == False
@staticmethod
def test_remote_xxe_manually_enabled():
global hit_xxe
hit_xxe = False
parser = lxml.etree.XMLParser(no_network=False)
root = lxml.etree.fromstring(remote_xxe, parser=parser)
assert root.tag == "test"
assert root.text == "ok"
assert hit_xxe == True
@staticmethod
def test_dtd_disabled_by_default():
global hit_dtd
hit_dtd = False
parser = lxml.etree.XMLParser()
root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
assert hit_dtd == False
@staticmethod
def test_dtd_manually_enabled():
global hit_dtd
hit_dtd = False
# Need to set BOTH load_dtd and no_network
parser = lxml.etree.XMLParser(load_dtd=True)
root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
assert hit_dtd == False
parser = lxml.etree.XMLParser(no_network=False)
root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
assert hit_dtd == False
parser = lxml.etree.XMLParser(load_dtd=True, no_network=False)
root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
assert hit_dtd == True
hit_dtd = False
# Setting dtd_validation also does not allow the remote access
parser = lxml.etree.XMLParser(dtd_validation=True, load_dtd=True)
try:
root = lxml.etree.fromstring(dtd_retrieval, parser=parser)
except lxml.etree.XMLSyntaxError:
pass
assert hit_dtd == False
# ==============================================================================
import xmltodict
class TestXmltodict:
@staticmethod
def test_billion_laughs_disabled_by_default():
d = xmltodict.parse(billion_laughs)
assert d == {"lolz": None}, d
@staticmethod
def test_quardratic_blowup_disabled_by_default():
d = xmltodict.parse(quadratic_blowup)
assert d == {"foo": None}, d
@staticmethod
@expects_timeout
def test_billion_laughs_manually_enabled():
xmltodict.parse(billion_laughs, disable_entities=False)
@staticmethod
@expects_timeout
def test_quardratic_blowup_manually_enabled():
xmltodict.parse(quadratic_blowup, disable_entities=False)
@staticmethod
def test_ok_xml():
d = xmltodict.parse(ok_xml)
assert d == {"test": "hello world"}, d
@staticmethod
def test_local_xxe_not_possible():
d = xmltodict.parse(local_xxe)
assert d == {"test": None}
d = xmltodict.parse(local_xxe, disable_entities=False)
assert d == {"test": None}
@staticmethod
def test_remote_xxe_not_possible():
global hit_xxe
hit_xxe = False
d = xmltodict.parse(remote_xxe)
assert d == {"test": None}
assert hit_xxe == False
d = xmltodict.parse(remote_xxe, disable_entities=False)
assert d == {"test": None}
assert hit_xxe == False
@staticmethod
def test_dtd_not_possible():
global hit_dtd
hit_dtd = False
d = xmltodict.parse(dtd_retrieval)
assert hit_dtd == False
# ==============================================================================
import xml.dom.minidom
class TestMinidom:
@staticmethod
@expects_timeout
def test_billion_laughs():
xml.dom.minidom.parseString(billion_laughs)
@staticmethod
@expects_timeout
def test_quardratic_blowup():
xml.dom.minidom.parseString(quadratic_blowup)
@staticmethod
def test_ok_xml():
doc = xml.dom.minidom.parseString(ok_xml)
assert doc.documentElement.tagName == "test"
assert doc.documentElement.childNodes[0].data == "hello world"
@staticmethod
def test_xxe():
# disabled by default
doc = xml.dom.minidom.parseString(local_xxe)
assert doc.documentElement.tagName == "test"
assert doc.documentElement.childNodes == []
# but can be turned on
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
doc = xml.dom.minidom.parseString(local_xxe, parser=parser)
assert doc.documentElement.tagName == "test"
assert doc.documentElement.childNodes[0].data == "SECRET_FLAG"
# which also works remotely
global hit_xxe
hit_xxe = False
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
_doc = xml.dom.minidom.parseString(remote_xxe, parser=parser)
assert hit_xxe == True
@staticmethod
def test_dtd():
# not possible by default
global hit_dtd
hit_dtd = False
_doc = xml.dom.minidom.parseString(dtd_retrieval)
assert hit_dtd == False
# but can be turned on
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
_doc = xml.dom.minidom.parseString(dtd_retrieval, parser=parser)
assert hit_dtd == True
# ==============================================================================
import xml.dom.pulldom
class TestPulldom:
@staticmethod
@expects_timeout
def test_billion_laughs():
doc = xml.dom.pulldom.parseString(billion_laughs)
# you NEED to iterate over the items for it to take long
for event, node in doc:
pass
@staticmethod
@expects_timeout
def test_quardratic_blowup():
doc = xml.dom.pulldom.parseString(quadratic_blowup)
for event, node in doc:
pass
@staticmethod
def test_ok_xml():
doc = xml.dom.pulldom.parseString(ok_xml)
for event, node in doc:
if event == xml.dom.pulldom.START_ELEMENT:
assert node.tagName == "test"
elif event == xml.dom.pulldom.CHARACTERS:
assert node.data == "hello world"
@staticmethod
def test_xxe():
# disabled by default
doc = xml.dom.pulldom.parseString(local_xxe)
found_flag = False
for event, node in doc:
if event == xml.dom.pulldom.START_ELEMENT:
assert node.tagName == "test"
elif event == xml.dom.pulldom.CHARACTERS:
if node.data == "SECRET_FLAG":
found_flag = True
assert found_flag == False
# but can be turned on
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
doc = xml.dom.pulldom.parseString(local_xxe, parser=parser)
found_flag = False
for event, node in doc:
if event == xml.dom.pulldom.START_ELEMENT:
assert node.tagName == "test"
elif event == xml.dom.pulldom.CHARACTERS:
if node.data == "SECRET_FLAG":
found_flag = True
assert found_flag == True
# which also works remotely
global hit_xxe
hit_xxe = False
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
doc = xml.dom.pulldom.parseString(remote_xxe, parser=parser)
assert hit_xxe == False
for event, node in doc:
pass
assert hit_xxe == True
@staticmethod
def test_dtd():
# not possible by default
global hit_dtd
hit_dtd = False
doc = xml.dom.pulldom.parseString(dtd_retrieval)
for event, node in doc:
pass
assert hit_dtd == False
# but can be turned on
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
doc = xml.dom.pulldom.parseString(dtd_retrieval, parser=parser)
for event, node in doc:
pass
assert hit_dtd == True
# ==============================================================================
import xml.parsers.expat
class TestExpat:
# this is the underlying parser implementation used by the rest of the Python
# standard library. But people are probably not using this directly.
@staticmethod
@expects_timeout
def test_billion_laughs():
parser = xml.parsers.expat.ParserCreate()
parser.Parse(billion_laughs, True)
@staticmethod
@expects_timeout
def test_quardratic_blowup():
parser = xml.parsers.expat.ParserCreate()
parser.Parse(quadratic_blowup, True)
@staticmethod
def test_ok_xml():
char_data_recv = []
def char_data_handler(data):
char_data_recv.append(data)
parser = xml.parsers.expat.ParserCreate()
parser.CharacterDataHandler = char_data_handler
parser.Parse(ok_xml, True)
assert char_data_recv == ["hello world"]
@staticmethod
def test_xxe():
# not vuln by default
char_data_recv = []
def char_data_handler(data):
char_data_recv.append(data)
parser = xml.parsers.expat.ParserCreate()
parser.CharacterDataHandler = char_data_handler
parser.Parse(local_xxe, True)
assert char_data_recv == []
# there might be ways to make it vuln, but I did not investigate futher.
@staticmethod
def test_dtd():
# not vuln by default
global hit_dtd
hit_dtd = False
parser = xml.parsers.expat.ParserCreate()
parser.Parse(dtd_retrieval, True)
assert hit_dtd == False
# there might be ways to make it vuln, but I did not investigate futher.

View File

@@ -0,0 +1 @@
SECRET_FLAG

View File

@@ -0,0 +1,31 @@
from io import StringIO
import xml.dom.minidom
import xml.dom.pulldom
import xml.sax
x = "some xml"
# minidom
xml.dom.minidom.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.dom.minidom.parse(file=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.dom.minidom.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.dom.minidom.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
# pulldom
xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
# These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/)
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
xml.dom.minidom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
xml.dom.minidom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
xml.dom.pulldom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'

View File

@@ -0,0 +1,45 @@
from io import StringIO
import xml.etree.ElementTree
x = "some xml"
# Parsing in different ways
xml.etree.ElementTree.fromstring(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.fromstring(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.fromstringlist([x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.XML(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.XML(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.XMLID(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.XMLID(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.iterparse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
# With parsers (no options available to disable/enable security features)
parser = xml.etree.ElementTree.XMLParser()
xml.etree.ElementTree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
# manual use of feed method
parser = xml.etree.ElementTree.XMLParser()
parser.feed(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
parser.close()
# manual use of feed method on XMLPullParser
parser = xml.etree.ElementTree.XMLPullParser()
parser.feed(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
parser.close()
# note: it's technically possible to use the thing wrapper func `fromstring` with an
# `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it
# seems very unlikely that anyone would do this, so we have intentionally not added any
# tests for this.

View File

@@ -0,0 +1,64 @@
from io import StringIO
import xml.sax
x = "some xml"
class MainHandler(xml.sax.ContentHandler):
def __init__(self):
self._result = []
def characters(self, data):
self._result.append(data)
xml.sax.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.sax.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.sax.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
xml.sax.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'
parser = xml.sax.make_parser()
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
parser.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
# You can make it vuln to both XXE and DTD retrieval by setting this flag
# see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
# Forward Type Tracking test
def func(cond):
parser = xml.sax.make_parser()
if cond:
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
else:
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
# make it vuln, then making it safe
# a bit of an edge-case, but is nice to be able to handle.
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup'
def check_conditional_assignment(cond):
parser = xml.sax.make_parser()
if cond:
parser.setFeature(xml.sax.handler.feature_external_ges, True)
else:
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'
def check_conditional_assignment2(cond):
parser = xml.sax.make_parser()
if cond:
flag_value = True
else:
flag_value = False
parser.setFeature(xml.sax.handler.feature_external_ges, flag_value)
parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE'

View File

@@ -0,0 +1,8 @@
import xmltodict
x = "some xml"
xmltodict.parse(x) # $ input=x
xmltodict.parse(xml_input=x) # $ input=x
xmltodict.parse(x, disable_entities=False) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup'

View File

@@ -0,0 +1 @@
| xmlrpc_server.py:7:10:7:48 | ControlFlowNode for SimpleXMLRPCServer() | SimpleXMLRPCServer is vulnerable to: Billion Laughs, Quadratic Blowup. |

View File

@@ -0,0 +1 @@
experimental/Security/CWE-611/SimpleXmlRpcServer.ql

View File

@@ -0,0 +1,27 @@
edges
| test.py:8:19:8:25 | ControlFlowNode for request | test.py:8:19:8:30 | ControlFlowNode for Attribute |
| test.py:8:19:8:30 | ControlFlowNode for Attribute | test.py:8:19:8:45 | ControlFlowNode for Subscript |
| test.py:8:19:8:45 | ControlFlowNode for Subscript | test.py:9:34:9:44 | ControlFlowNode for xml_content |
| test.py:13:19:13:25 | ControlFlowNode for request | test.py:13:19:13:30 | ControlFlowNode for Attribute |
| test.py:13:19:13:30 | ControlFlowNode for Attribute | test.py:13:19:13:45 | ControlFlowNode for Subscript |
| test.py:13:19:13:45 | ControlFlowNode for Subscript | test.py:15:34:15:44 | ControlFlowNode for xml_content |
| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:19:19:30 | ControlFlowNode for Attribute |
| test.py:19:19:19:30 | ControlFlowNode for Attribute | test.py:19:19:19:45 | ControlFlowNode for Subscript |
| test.py:19:19:19:45 | ControlFlowNode for Subscript | test.py:30:34:30:44 | ControlFlowNode for xml_content |
nodes
| test.py:8:19:8:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:8:19:8:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:8:19:8:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| test.py:9:34:9:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
| test.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| test.py:15:34:15:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
subpaths
#select
| test.py:9:34:9:44 | ControlFlowNode for xml_content | test.py:8:19:8:25 | ControlFlowNode for request | test.py:9:34:9:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | test.py:9:34:9:44 | ControlFlowNode for xml_content | This | test.py:8:19:8:25 | ControlFlowNode for request | user-provided value |
| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:19:19:19:25 | ControlFlowNode for request | test.py:30:34:30:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | test.py:30:34:30:44 | ControlFlowNode for xml_content | This | test.py:19:19:19:25 | ControlFlowNode for request | user-provided value |

View File

@@ -0,0 +1 @@
experimental/Security/CWE-611/XmlEntityInjection.ql

View File

@@ -0,0 +1,30 @@
from flask import Flask, request
import lxml.etree
app = Flask(__name__)
@app.route("/vuln-handler")
def vuln_handler():
xml_content = request.args['xml_content']
return lxml.etree.fromstring(xml_content).text
@app.route("/safe-handler")
def safe_handler():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(resolve_entities=False)
return lxml.etree.fromstring(xml_content, parser=parser).text
@app.route("/super-vuln-handler")
def super_vuln_handler():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(
# allows XXE
resolve_entities=True,
# allows remote XXE
no_network=False,
# together with `no_network=False`, allows DTD-retrival
load_dtd=True,
# allows DoS attacks
huge_tree=True,
)
return lxml.etree.fromstring(xml_content, parser=parser).text

View File

@@ -0,0 +1,12 @@
from xmlrpc.server import SimpleXMLRPCServer
def foo(n: str):
print("foo called with arg:", n, type(n))
return "ok"
server = SimpleXMLRPCServer(("127.0.0.1", 8000))
server.register_function(foo, "foo")
server.serve_forever()
# normal: curl 127.0.0.1:8000 --data-raw '<?xml version="1.0"?><methodCall><methodName>foo</methodName><params><param><value>42</value></param></params></methodCall>'
# billion_laughs: curl 127.0.0.1:8000 --data-raw '<?xml version="1.0"?><!DOCTYPE lolz [<!ENTITY lol "lol"><!ELEMENT lolz (#PCDATA)><!ENTITY lol1 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;"><!ENTITY lol2 "&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;"><!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;"><!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;"><!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;"><!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;"><!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;"><!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;"><!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;">]><methodCall><methodName>foo</methodName><params><param><value>&lol9;</value></param></params></methodCall>'