mirror of
https://github.com/github/codeql.git
synced 2025-12-20 10:46:30 +01:00
Python: More Flask modeling kinda works
It "kinda" works now, but it really is not a pretty solution. Adding all these "tracked" objects is SUPER annoying... it _would_ be possible to skip them, but that seems like it will give the wrong edges for dataflow/taintflow queries :| A good chunk of it should be able to be removed with access-paths like C# does for library modeling. Some of it could be solved by better type-tracking API like API Graphs... but it seems like we generally are just lacking the nice-to-have features like `.getAMemberCall` and the like. See https://github.com/github/codeql/pull/4082/files#diff-9aa94c4d713ef9d8da73918ff53db774L33
This commit is contained in:
@@ -65,3 +65,28 @@ EssaNode importMember(string moduleName, string memberName) {
|
||||
result.getVar().(AssignmentDefinition).getSourceVariable() = var
|
||||
)
|
||||
}
|
||||
|
||||
abstract class ListLike extends Node {
|
||||
/** Gets a Node that is an access of an element of this list-like object */
|
||||
Node getElementAccess() {
|
||||
// subscript
|
||||
result.asCfgNode().(SubscriptNode).getObject() = this.asCfgNode()
|
||||
or
|
||||
// get
|
||||
// NOTE: will not track bound method, `f = obj.func; f()`
|
||||
result.asCfgNode().(CallNode).getFunction().(AttrNode).getObject("pop") = this.asCfgNode()
|
||||
}
|
||||
}
|
||||
|
||||
/** Class of dictionary-like objects */
|
||||
abstract class DictLike extends Node {
|
||||
/** Gets a Node that is an access of an element of this dictionary-like object */
|
||||
Node getElementAccess() {
|
||||
// subscript
|
||||
result.asCfgNode().(SubscriptNode).getObject() = this.asCfgNode()
|
||||
or
|
||||
// get
|
||||
// NOTE: will not track bound method, `f = obj.func; f()`
|
||||
result.asCfgNode().(CallNode).getFunction().(AttrNode).getObject("get") = this.asCfgNode()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ private import python
|
||||
private import experimental.dataflow.DataFlow
|
||||
private import experimental.dataflow.RemoteFlowSources
|
||||
private import experimental.semmle.python.Concepts
|
||||
private import experimental.semmle.python.frameworks.Werkzeug
|
||||
|
||||
private module Flask {
|
||||
/** Gets a reference to the `flask` module. */
|
||||
@@ -42,42 +43,39 @@ private module Flask {
|
||||
override string getSourceType() { result = "flask.request" }
|
||||
}
|
||||
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
|
||||
/** Gets a reference to the MultiDict attributes of `flask.request`. */
|
||||
DataFlow::Node requestMultiDictAttribute(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result.asCfgNode().(AttrNode).getObject(["args", "values", "form"]) =
|
||||
flask::request().asCfgNode()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = requestMultiDictAttribute(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the MultiDict attributes of `flask.request`. */
|
||||
DataFlow::Node requestMultiDictAttribute() {
|
||||
result = requestMultiDictAttribute(DataFlow::TypeTracker::end())
|
||||
}
|
||||
|
||||
/**
|
||||
* A source of remote flow from attributes from a flask request.
|
||||
*
|
||||
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Request
|
||||
*/
|
||||
private class RequestInputAccess extends RemoteFlowSource::Range {
|
||||
string attr_name;
|
||||
|
||||
RequestInputAccess() {
|
||||
// attributes
|
||||
exists(AttrNode attr, string name |
|
||||
this.asCfgNode() = attr and attr.getObject(name) = flask::request().asCfgNode()
|
||||
exists(AttrNode attr |
|
||||
this.asCfgNode() = attr and attr.getObject(attr_name) = flask::request().asCfgNode()
|
||||
|
|
||||
name in ["path",
|
||||
// string
|
||||
attr_name in ["path",
|
||||
// str
|
||||
"full_path", "base_url", "url", "access_control_request_method", "content_encoding",
|
||||
"content_md5", "content_type", "data", "method", "mimetype", "origin", "query_string",
|
||||
"referrer", "remote_addr", "remote_user", "user_agent",
|
||||
// dict
|
||||
"environ", "cookies", "mimetype_params", "view_args",
|
||||
//
|
||||
"args", "values", "form",
|
||||
// json
|
||||
"json",
|
||||
// List[str]
|
||||
"access_route",
|
||||
// file-like
|
||||
"stream", "input_stream",
|
||||
// MultiDict[str, str]
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
|
||||
"args", "values", "form",
|
||||
// MultiDict[str, FileStorage]
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
|
||||
// TODO: FileStorage needs extra taint steps
|
||||
"files",
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.HeaderSet
|
||||
"access_control_request_headers", "pragma",
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Accept
|
||||
@@ -89,33 +87,37 @@ private module Flask {
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.RequestCacheControl
|
||||
// TODO: has attributes like `no_cache`, and `to_header` method (actually, many of these models do)
|
||||
"cache_control",
|
||||
// TODO: MultiDict[FileStorage]
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
|
||||
"files",
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers
|
||||
// TODO: dict-like with wsgiref.headers.Header compatibility methods
|
||||
"headers"]
|
||||
)
|
||||
or
|
||||
// methods
|
||||
exists(CallNode call, string name | this.asCfgNode() = call |
|
||||
// NOTE: will not track bound method, `f = func; f()`
|
||||
name in ["get_data", "get_json"] and
|
||||
call.getFunction().(AttrNode).getObject(name) = flask::request().asCfgNode()
|
||||
)
|
||||
or
|
||||
// multi dict special handling
|
||||
(
|
||||
this = requestMultiDictAttribute()
|
||||
or
|
||||
exists(CallNode call | this.asCfgNode() = call |
|
||||
// NOTE: will not track bound method, `f = func; f()`
|
||||
call.getFunction().(AttrNode).getObject("getlist") =
|
||||
requestMultiDictAttribute().asCfgNode()
|
||||
)
|
||||
exists(CallNode call | this.asCfgNode() = call |
|
||||
// NOTE: will not track bound method, `f = obj.func; f()`
|
||||
attr_name in ["get_data", "get_json"] and
|
||||
call.getFunction().(AttrNode).getObject(attr_name) = flask::request().asCfgNode()
|
||||
)
|
||||
}
|
||||
|
||||
override string getSourceType() { result = "flask.request input" }
|
||||
}
|
||||
|
||||
private class RequestInputMultiDict extends RequestInputAccess,
|
||||
Werkzeug::Datastructures::MultiDict {
|
||||
RequestInputMultiDict() { attr_name in ["args", "values", "form", "files"] }
|
||||
}
|
||||
|
||||
private class RequestInputFiles extends RequestInputMultiDict {
|
||||
RequestInputFiles() { attr_name = "files" }
|
||||
}
|
||||
|
||||
private class RequestInputFileStorage extends Werkzeug::Datastructures::FileStorage {
|
||||
RequestInputFileStorage() {
|
||||
exists(RequestInputFiles files, Werkzeug::Datastructures::MultiDictTracked filesTracked |
|
||||
filesTracked.getMultiDict() = files and
|
||||
this = filesTracked.getElementAccess()
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
140
python/ql/src/experimental/semmle/python/frameworks/Werkzeug.qll
Normal file
140
python/ql/src/experimental/semmle/python/frameworks/Werkzeug.qll
Normal file
@@ -0,0 +1,140 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `flask` package.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import experimental.dataflow.DataFlow
|
||||
private import experimental.dataflow.TaintTracking
|
||||
|
||||
module Werkzeug {
|
||||
module Datastructures {
|
||||
// ---------------------------------------------------------------------- //
|
||||
// MultiDict //
|
||||
// ---------------------------------------------------------------------- //
|
||||
/**
|
||||
* A Node representing an instance of a werkzeug.datastructures.MultiDict
|
||||
*
|
||||
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
|
||||
*/
|
||||
abstract class MultiDict extends DataFlow::Node { }
|
||||
|
||||
private DataFlow::Node multiDictTrack(MultiDict multiDict, DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result instanceof MultiDict
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = multiDictTrack(multiDict, t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the MultiDict attributes of `flask.request`. */
|
||||
private DataFlow::Node multiDictTrack(MultiDict multiDict) {
|
||||
result = multiDictTrack(multiDict, DataFlow::TypeTracker::end())
|
||||
}
|
||||
|
||||
class MultiDictTracked extends DataFlow::Node, DataFlow::DictLike {
|
||||
MultiDict multiDict;
|
||||
|
||||
MultiDictTracked() { this = multiDictTrack(multiDict) }
|
||||
|
||||
MultiDict getMultiDict() { result = multiDict }
|
||||
|
||||
override DataFlow::Node getElementAccess() {
|
||||
result = DataFlow::DictLike.super.getElementAccess()
|
||||
or
|
||||
exists(MultiDictGetListCallResultTracked tracked_call_result |
|
||||
tracked_call_result.getCall().getMultiDict() = this and
|
||||
result = tracked_call_result.getElementAccess()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private DataFlow::Node multiDictGetListTrack(MultiDictTracked multiDict, DataFlow::TypeTracker t) {
|
||||
/*
|
||||
* using t.startInAttr("getlist") was not good solution
|
||||
* ```py
|
||||
* a = request.args
|
||||
* b = a
|
||||
* a.getlist("key")
|
||||
* b.getlist("key")
|
||||
* ```
|
||||
* would give `request.args` -> `b.getlist` -- this is correct, but not helpful in a taint-path explanation,
|
||||
* we REALLY WANT it to be `request.args -> a -> b -> b.getlist`
|
||||
* This requirement means that we do need the predicate `multiDictTrack`, which could be spared otherwise.
|
||||
*/
|
||||
|
||||
t.start() and
|
||||
result.asCfgNode().(AttrNode).getObject("getlist") = multiDict.asCfgNode()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = multiDictGetListTrack(multiDict, t2).track(t2, t))
|
||||
}
|
||||
|
||||
private DataFlow::Node multiDictGetListTrack(MultiDictTracked multiDict) {
|
||||
result = multiDictGetListTrack(multiDict, DataFlow::TypeTracker::end())
|
||||
}
|
||||
|
||||
private class MultiDictGetListCall extends DataFlow::Node {
|
||||
MultiDictTracked multiDict;
|
||||
|
||||
MultiDictGetListCall() {
|
||||
this.asCfgNode().(CallNode).getFunction() = multiDictGetListTrack(multiDict).asCfgNode()
|
||||
}
|
||||
|
||||
MultiDictTracked getMultiDict() { result = multiDict }
|
||||
}
|
||||
|
||||
private DataFlow::Node multiDictGetListCallTrack(
|
||||
MultiDictGetListCall call, DataFlow::TypeTracker t
|
||||
) {
|
||||
t.start() and
|
||||
result = call
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = multiDictGetListCallTrack(call, t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the MultiDict attributes of `flask.request`. */
|
||||
private DataFlow::Node multiDictGetListCallTrack(MultiDictGetListCall call) {
|
||||
result = multiDictGetListCallTrack(call, DataFlow::TypeTracker::end())
|
||||
}
|
||||
|
||||
private class MultiDictGetListCallResultTracked extends DataFlow::Node, DataFlow::ListLike {
|
||||
MultiDictGetListCall call;
|
||||
|
||||
MultiDictGetListCallResultTracked() { this = multiDictGetListCallTrack(call) }
|
||||
|
||||
MultiDictGetListCall getCall() { result = call }
|
||||
}
|
||||
|
||||
private class MultiDictAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
nodeTo.(MultiDictGetListCall).getMultiDict() = nodeFrom.(MultiDictTracked)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------- //
|
||||
// FileStorage //
|
||||
// ---------------------------------------------------------------------- //
|
||||
/**
|
||||
* A Node representing an instance of a werkzeug.datastructures.FileStorage
|
||||
*
|
||||
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
|
||||
*/
|
||||
abstract class FileStorage extends DataFlow::Node { }
|
||||
|
||||
private class FileStorageAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
nodeFrom instanceof FileStorage and
|
||||
exists(string name |
|
||||
name in ["filename",
|
||||
// str
|
||||
"name", "content_type", "mimetype",
|
||||
// file-like
|
||||
"stream",
|
||||
// TODO: werkzeug.datastructures.Headers
|
||||
"headers",
|
||||
// dict[str, str]
|
||||
"mimetype_params"] and
|
||||
nodeTo.asCfgNode().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user