Python: More Flask modeling kinda works

It "kinda" works now, but it really is not a pretty solution. Adding all these
"tracked" objects is SUPER annoying... it _would_ be possible to skip them, but
that seems like it will give the wrong edges for dataflow/taintflow queries :|

A good chunk of it should be able to be removed with access-paths like C# does
for library modeling. Some of it could be solved by better type-tracking API
like API Graphs... but it seems like we generally are just lacking the
nice-to-have features like `.getAMemberCall` and the like. See
https://github.com/github/codeql/pull/4082/files#diff-9aa94c4d713ef9d8da73918ff53db774L33
This commit is contained in:
Rasmus Wriedt Larsen
2020-09-21 20:55:53 +02:00
parent 3c08590ee4
commit 00ea0cebc3
4 changed files with 212 additions and 45 deletions

View File

@@ -65,3 +65,28 @@ EssaNode importMember(string moduleName, string memberName) {
result.getVar().(AssignmentDefinition).getSourceVariable() = var
)
}
abstract class ListLike extends Node {
/** Gets a Node that is an access of an element of this list-like object */
Node getElementAccess() {
// subscript
result.asCfgNode().(SubscriptNode).getObject() = this.asCfgNode()
or
// get
// NOTE: will not track bound method, `f = obj.func; f()`
result.asCfgNode().(CallNode).getFunction().(AttrNode).getObject("pop") = this.asCfgNode()
}
}
/** Class of dictionary-like objects */
abstract class DictLike extends Node {
/** Gets a Node that is an access of an element of this dictionary-like object */
Node getElementAccess() {
// subscript
result.asCfgNode().(SubscriptNode).getObject() = this.asCfgNode()
or
// get
// NOTE: will not track bound method, `f = obj.func; f()`
result.asCfgNode().(CallNode).getFunction().(AttrNode).getObject("get") = this.asCfgNode()
}
}

View File

@@ -6,6 +6,7 @@ private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import experimental.semmle.python.frameworks.Werkzeug
private module Flask {
/** Gets a reference to the `flask` module. */
@@ -42,42 +43,39 @@ private module Flask {
override string getSourceType() { result = "flask.request" }
}
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
/** Gets a reference to the MultiDict attributes of `flask.request`. */
DataFlow::Node requestMultiDictAttribute(DataFlow::TypeTracker t) {
t.start() and
result.asCfgNode().(AttrNode).getObject(["args", "values", "form"]) =
flask::request().asCfgNode()
or
exists(DataFlow::TypeTracker t2 | result = requestMultiDictAttribute(t2).track(t2, t))
}
/** Gets a reference to the MultiDict attributes of `flask.request`. */
DataFlow::Node requestMultiDictAttribute() {
result = requestMultiDictAttribute(DataFlow::TypeTracker::end())
}
/**
* A source of remote flow from attributes from a flask request.
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Request
*/
private class RequestInputAccess extends RemoteFlowSource::Range {
string attr_name;
RequestInputAccess() {
// attributes
exists(AttrNode attr, string name |
this.asCfgNode() = attr and attr.getObject(name) = flask::request().asCfgNode()
exists(AttrNode attr |
this.asCfgNode() = attr and attr.getObject(attr_name) = flask::request().asCfgNode()
|
name in ["path",
// string
attr_name in ["path",
// str
"full_path", "base_url", "url", "access_control_request_method", "content_encoding",
"content_md5", "content_type", "data", "method", "mimetype", "origin", "query_string",
"referrer", "remote_addr", "remote_user", "user_agent",
// dict
"environ", "cookies", "mimetype_params", "view_args",
//
"args", "values", "form",
// json
"json",
// List[str]
"access_route",
// file-like
"stream", "input_stream",
// MultiDict[str, str]
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
"args", "values", "form",
// MultiDict[str, FileStorage]
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
// TODO: FileStorage needs extra taint steps
"files",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.HeaderSet
"access_control_request_headers", "pragma",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Accept
@@ -89,33 +87,37 @@ private module Flask {
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.RequestCacheControl
// TODO: has attributes like `no_cache`, and `to_header` method (actually, many of these models do)
"cache_control",
// TODO: MultiDict[FileStorage]
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
"files",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers
// TODO: dict-like with wsgiref.headers.Header compatibility methods
"headers"]
)
or
// methods
exists(CallNode call, string name | this.asCfgNode() = call |
// NOTE: will not track bound method, `f = func; f()`
name in ["get_data", "get_json"] and
call.getFunction().(AttrNode).getObject(name) = flask::request().asCfgNode()
)
or
// multi dict special handling
(
this = requestMultiDictAttribute()
or
exists(CallNode call | this.asCfgNode() = call |
// NOTE: will not track bound method, `f = func; f()`
call.getFunction().(AttrNode).getObject("getlist") =
requestMultiDictAttribute().asCfgNode()
)
exists(CallNode call | this.asCfgNode() = call |
// NOTE: will not track bound method, `f = obj.func; f()`
attr_name in ["get_data", "get_json"] and
call.getFunction().(AttrNode).getObject(attr_name) = flask::request().asCfgNode()
)
}
override string getSourceType() { result = "flask.request input" }
}
private class RequestInputMultiDict extends RequestInputAccess,
Werkzeug::Datastructures::MultiDict {
RequestInputMultiDict() { attr_name in ["args", "values", "form", "files"] }
}
private class RequestInputFiles extends RequestInputMultiDict {
RequestInputFiles() { attr_name = "files" }
}
private class RequestInputFileStorage extends Werkzeug::Datastructures::FileStorage {
RequestInputFileStorage() {
exists(RequestInputFiles files, Werkzeug::Datastructures::MultiDictTracked filesTracked |
filesTracked.getMultiDict() = files and
this = filesTracked.getElementAccess()
)
}
}
}

View File

@@ -0,0 +1,140 @@
/**
* Provides classes modeling security-relevant aspects of the `flask` package.
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.TaintTracking
module Werkzeug {
module Datastructures {
// ---------------------------------------------------------------------- //
// MultiDict //
// ---------------------------------------------------------------------- //
/**
* A Node representing an instance of a werkzeug.datastructures.MultiDict
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
*/
abstract class MultiDict extends DataFlow::Node { }
private DataFlow::Node multiDictTrack(MultiDict multiDict, DataFlow::TypeTracker t) {
t.start() and
result instanceof MultiDict
or
exists(DataFlow::TypeTracker t2 | result = multiDictTrack(multiDict, t2).track(t2, t))
}
/** Gets a reference to the MultiDict attributes of `flask.request`. */
private DataFlow::Node multiDictTrack(MultiDict multiDict) {
result = multiDictTrack(multiDict, DataFlow::TypeTracker::end())
}
class MultiDictTracked extends DataFlow::Node, DataFlow::DictLike {
MultiDict multiDict;
MultiDictTracked() { this = multiDictTrack(multiDict) }
MultiDict getMultiDict() { result = multiDict }
override DataFlow::Node getElementAccess() {
result = DataFlow::DictLike.super.getElementAccess()
or
exists(MultiDictGetListCallResultTracked tracked_call_result |
tracked_call_result.getCall().getMultiDict() = this and
result = tracked_call_result.getElementAccess()
)
}
}
private DataFlow::Node multiDictGetListTrack(MultiDictTracked multiDict, DataFlow::TypeTracker t) {
/*
* using t.startInAttr("getlist") was not good solution
* ```py
* a = request.args
* b = a
* a.getlist("key")
* b.getlist("key")
* ```
* would give `request.args` -> `b.getlist` -- this is correct, but not helpful in a taint-path explanation,
* we REALLY WANT it to be `request.args -> a -> b -> b.getlist`
* This requirement means that we do need the predicate `multiDictTrack`, which could be spared otherwise.
*/
t.start() and
result.asCfgNode().(AttrNode).getObject("getlist") = multiDict.asCfgNode()
or
exists(DataFlow::TypeTracker t2 | result = multiDictGetListTrack(multiDict, t2).track(t2, t))
}
private DataFlow::Node multiDictGetListTrack(MultiDictTracked multiDict) {
result = multiDictGetListTrack(multiDict, DataFlow::TypeTracker::end())
}
private class MultiDictGetListCall extends DataFlow::Node {
MultiDictTracked multiDict;
MultiDictGetListCall() {
this.asCfgNode().(CallNode).getFunction() = multiDictGetListTrack(multiDict).asCfgNode()
}
MultiDictTracked getMultiDict() { result = multiDict }
}
private DataFlow::Node multiDictGetListCallTrack(
MultiDictGetListCall call, DataFlow::TypeTracker t
) {
t.start() and
result = call
or
exists(DataFlow::TypeTracker t2 | result = multiDictGetListCallTrack(call, t2).track(t2, t))
}
/** Gets a reference to the MultiDict attributes of `flask.request`. */
private DataFlow::Node multiDictGetListCallTrack(MultiDictGetListCall call) {
result = multiDictGetListCallTrack(call, DataFlow::TypeTracker::end())
}
private class MultiDictGetListCallResultTracked extends DataFlow::Node, DataFlow::ListLike {
MultiDictGetListCall call;
MultiDictGetListCallResultTracked() { this = multiDictGetListCallTrack(call) }
MultiDictGetListCall getCall() { result = call }
}
private class MultiDictAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.(MultiDictGetListCall).getMultiDict() = nodeFrom.(MultiDictTracked)
}
}
// ---------------------------------------------------------------------- //
// FileStorage //
// ---------------------------------------------------------------------- //
/**
* A Node representing an instance of a werkzeug.datastructures.FileStorage
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
*/
abstract class FileStorage extends DataFlow::Node { }
private class FileStorageAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeFrom instanceof FileStorage and
exists(string name |
name in ["filename",
// str
"name", "content_type", "mimetype",
// file-like
"stream",
// TODO: werkzeug.datastructures.Headers
"headers",
// dict[str, str]
"mimetype_params"] and
nodeTo.asCfgNode().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
)
}
}
}
}