Python: Adopt new approach in flask modeling

Removed all the dict-like stuff, not sure that is how we should do things.
This commit is contained in:
Rasmus Wriedt Larsen
2020-09-22 16:09:22 +02:00
parent a82fa04d8a
commit e614365963
4 changed files with 32 additions and 120 deletions

View File

@@ -65,28 +65,3 @@ EssaNode importMember(string moduleName, string memberName) {
result.getVar().(AssignmentDefinition).getSourceVariable() = var
)
}
abstract class ListLike extends Node {
/** Gets a Node that is an access of an element of this list-like object */
Node getElementAccess() {
// subscript
result.asCfgNode().(SubscriptNode).getObject() = this.asCfgNode()
or
// get
// NOTE: will not track bound method, `f = obj.func; f()`
result.asCfgNode().(CallNode).getFunction().(AttrNode).getObject("pop") = this.asCfgNode()
}
}
/** Class of dictionary-like objects */
abstract class DictLike extends Node {
/** Gets a Node that is an access of an element of this dictionary-like object */
Node getElementAccess() {
// subscript
result.asCfgNode().(SubscriptNode).getObject() = this.asCfgNode()
or
// get
// NOTE: will not track bound method, `f = obj.func; f()`
result.asCfgNode().(CallNode).getFunction().(AttrNode).getObject("get") = this.asCfgNode()
}
}

View File

@@ -8,6 +8,8 @@ private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import experimental.semmle.python.frameworks.Werkzeug
// for old improved impl see
// https://github.com/github/codeql/blob/9f95212e103c68d0c1dfa4b6f30fb5d53954ccef/python/ql/src/semmle/python/web/flask/Request.qll
private module Flask {
/** Gets a reference to the `flask` module. */
DataFlow::Node flask(DataFlow::TypeTracker t) {
@@ -36,7 +38,12 @@ private module Flask {
DataFlow::Node request() { result = flask::request(DataFlow::TypeTracker::end()) }
}
// TODO: Do we even need this class then? :|
// TODO: Do we even need this class? :|
/**
* A source of remote flow from a flask request.
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Request
*/
private class RequestSource extends RemoteFlowSource::Range {
RequestSource() { this = flask::request() }
@@ -111,13 +118,8 @@ private module Flask {
private class RequestInputFiles extends RequestInputMultiDict {
RequestInputFiles() { attr_name = "files" }
}
private class RequestInputFileStorage extends Werkzeug::Datastructures::FileStorage {
RequestInputFileStorage() {
exists(RequestInputFiles files, Werkzeug::Datastructures::MultiDictTracked filesTracked |
filesTracked.getMultiDict() = files and
this = filesTracked.getElementAccess()
)
}
}
// TODO: Somehow specify that elements of `RequestInputFiles` are
// Werkzeug::Datastructures::FileStorage and should have those additional taint steps
// AND that the 0-indexed argument to its' save method is a sink for path-injection.
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage.save
}

View File

@@ -6,6 +6,8 @@ private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.TaintTracking
// for old impl see
// https://github.com/github/codeql/blob/9f95212e103c68d0c1dfa4b6f30fb5d53954ccef/python/ql/src/semmle/python/libraries/Werkzeug.qll
module Werkzeug {
module Datastructures {
// ---------------------------------------------------------------------- //
@@ -18,94 +20,26 @@ module Werkzeug {
*/
abstract class MultiDict extends DataFlow::Node { }
private DataFlow::Node multiDictTrack(MultiDict multiDict, DataFlow::TypeTracker t) {
t.start() and
result instanceof MultiDict
or
exists(DataFlow::TypeTracker t2 | result = multiDictTrack(multiDict, t2).track(t2, t))
}
/** Gets a reference to the MultiDict attributes of `flask.request`. */
private DataFlow::Node multiDictTrack(MultiDict multiDict) {
result = multiDictTrack(multiDict, DataFlow::TypeTracker::end())
}
class MultiDictTracked extends DataFlow::Node, DataFlow::DictLike {
MultiDict multiDict;
MultiDictTracked() { this = multiDictTrack(multiDict) }
MultiDict getMultiDict() { result = multiDict }
override DataFlow::Node getElementAccess() {
result = DataFlow::DictLike.super.getElementAccess()
private module MultiDictTracking {
private DataFlow::Node getlist(DataFlow::TypeTracker t) {
t.startInAttr("getlist") and
result instanceof MultiDict
or
exists(MultiDictGetListCallResultTracked tracked_call_result |
tracked_call_result.getCall().getMultiDict() = this and
result = tracked_call_result.getElementAccess()
)
}
}
private DataFlow::Node multiDictGetListTrack(MultiDictTracked multiDict, DataFlow::TypeTracker t) {
/*
* using t.startInAttr("getlist") was not good solution
* ```py
* a = request.args
* b = a
* a.getlist("key")
* b.getlist("key")
* ```
* would give `request.args` -> `b.getlist` -- this is correct, but not helpful in a taint-path explanation,
* we REALLY WANT it to be `request.args -> a -> b -> b.getlist`
* This requirement means that we do need the predicate `multiDictTrack`, which could be spared otherwise.
*/
t.start() and
result.asCfgNode().(AttrNode).getObject("getlist") = multiDict.asCfgNode()
or
exists(DataFlow::TypeTracker t2 | result = multiDictGetListTrack(multiDict, t2).track(t2, t))
}
private DataFlow::Node multiDictGetListTrack(MultiDictTracked multiDict) {
result = multiDictGetListTrack(multiDict, DataFlow::TypeTracker::end())
}
private class MultiDictGetListCall extends DataFlow::Node {
MultiDictTracked multiDict;
MultiDictGetListCall() {
this.asCfgNode().(CallNode).getFunction() = multiDictGetListTrack(multiDict).asCfgNode()
exists(DataFlow::TypeTracker t2 | result = getlist(t2).track(t2, t))
}
MultiDictTracked getMultiDict() { result = multiDict }
}
private DataFlow::Node multiDictGetListCallTrack(
MultiDictGetListCall call, DataFlow::TypeTracker t
) {
t.start() and
result = call
or
exists(DataFlow::TypeTracker t2 | result = multiDictGetListCallTrack(call, t2).track(t2, t))
}
/** Gets a reference to the MultiDict attributes of `flask.request`. */
private DataFlow::Node multiDictGetListCallTrack(MultiDictGetListCall call) {
result = multiDictGetListCallTrack(call, DataFlow::TypeTracker::end())
}
private class MultiDictGetListCallResultTracked extends DataFlow::Node, DataFlow::ListLike {
MultiDictGetListCall call;
MultiDictGetListCallResultTracked() { this = multiDictGetListCallTrack(call) }
MultiDictGetListCall getCall() { result = call }
DataFlow::Node getlist() { result = getlist(DataFlow::TypeTracker::end()) }
}
private class MultiDictAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.(MultiDictGetListCall).getMultiDict() = nodeFrom.(MultiDictTracked)
// obj -> obj.getlist
nodeTo.asCfgNode().(AttrNode).getObject("getlist") = nodeFrom.asCfgNode() and
nodeTo = MultiDictTracking::getlist()
or
// getlist -> getlist()
nodeFrom = MultiDictTracking::getlist() and
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode()
}
}
@@ -121,6 +55,7 @@ module Werkzeug {
private class FileStorageAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// TODO: should be `nodeFrom = tracked(any(FileStorage fs))`
nodeFrom instanceof FileStorage and
exists(string name |
name in ["filename",

View File

@@ -35,11 +35,11 @@
| test.py:65 | ok | test_taint | request.data |
| test.py:68 | ok | test_taint | request.files |
| test.py:69 | ok | test_taint | request.files['key'] |
| test.py:70 | ok | test_taint | request.files['key'].filename |
| test.py:71 | ok | test_taint | request.files['key'].stream |
| test.py:70 | fail | test_taint | request.files['key'].filename |
| test.py:71 | fail | test_taint | request.files['key'].stream |
| test.py:72 | ok | test_taint | request.files.getlist(..) |
| test.py:73 | ok | test_taint | request.files.getlist(..)[0].filename |
| test.py:74 | ok | test_taint | request.files.getlist(..)[0].stream |
| test.py:73 | fail | test_taint | request.files.getlist(..)[0].filename |
| test.py:74 | fail | test_taint | request.files.getlist(..)[0].stream |
| test.py:77 | ok | test_taint | request.form |
| test.py:78 | ok | test_taint | request.form['key'] |
| test.py:79 | ok | test_taint | request.form.getlist(..) |