Python: First attempt at modeling Flask

This commit is contained in:
Rasmus Wriedt Larsen
2020-09-21 14:39:15 +02:00
parent cdc5ca7aec
commit 7c205dd3fc
5 changed files with 436 additions and 1 deletions

View File

@@ -7,4 +7,115 @@ private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts
private module Flask { }
private module Flask {
/** Gets a reference to the `flask` module. */
DataFlow::Node flask(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importModule("flask")
or
exists(DataFlow::TypeTracker t2 | result = flask(t2).track(t2, t))
}
/** Gets a reference to the `flask` module. */
DataFlow::Node flask() { result = flask(DataFlow::TypeTracker::end()) }
module flask {
/** Gets a reference to the `flask.request` object. */
DataFlow::Node request(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importMember("flask", "request")
or
t.startInAttr("request") and
result = flask()
or
exists(DataFlow::TypeTracker t2 | result = flask::request(t2).track(t2, t))
}
/** Gets a reference to the `flask.request` object. */
DataFlow::Node request() { result = flask::request(DataFlow::TypeTracker::end()) }
}
// TODO: Do we even need this class then? :|
private class RequestSource extends RemoteFlowSource::Range {
RequestSource() { this = flask::request() }
override string getSourceType() { result = "flask.request" }
}
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
/** Gets a reference to the MultiDict attributes of `flask.request`. */
DataFlow::Node requestMultiDictAttribute(DataFlow::TypeTracker t) {
t.start() and
result.asCfgNode().(AttrNode).getObject(["args", "values", "form"]) =
flask::request().asCfgNode()
or
exists(DataFlow::TypeTracker t2 | result = requestMultiDictAttribute(t2).track(t2, t))
}
/** Gets a reference to the MultiDict attributes of `flask.request`. */
DataFlow::Node requestMultiDictAttribute() {
result = requestMultiDictAttribute(DataFlow::TypeTracker::end())
}
private class RequestInputAccess extends RemoteFlowSource::Range {
RequestInputAccess() {
// attributes
exists(AttrNode attr, string name |
this.asCfgNode() = attr and attr.getObject(name) = flask::request().asCfgNode()
|
name in ["path",
// string
"full_path", "base_url", "url", "access_control_request_method", "content_encoding",
"content_md5", "content_type", "data", "method", "mimetype", "origin", "query_string",
"referrer", "remote_addr", "remote_user", "user_agent",
// dict
"environ", "cookies", "mimetype_params", "view_args",
//
"args", "values", "form",
// json
"json",
// List[str]
"access_route",
// file-like
"stream", "input_stream",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.HeaderSet
"access_control_request_headers", "pragma",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Accept
// TODO: Kinda badly modeled for now -- has type List[Tuple[value, quality]], and some extra methods
"accept_charsets", "accept_encodings", "accept_languages", "accept_mimetypes",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Authorization
// TODO: dict subclass with extra attributes like `username` and `password`
"authorization",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.RequestCacheControl
// TODO: has attributes like `no_cache`, and `to_header` method (actually, many of these models do)
"cache_control",
// TODO: MultiDict[FileStorage]
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
"files",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers
// TODO: dict-like with wsgiref.headers.Header compatibility methods
"headers"]
)
or
// methods
exists(CallNode call, string name | this.asCfgNode() = call |
// NOTE: will not track bound method, `f = func; f()`
name in ["get_data", "get_json"] and
call.getFunction().(AttrNode).getObject(name) = flask::request().asCfgNode()
)
or
// multi dict special handling
(
this = requestMultiDictAttribute()
or
exists(CallNode call | this.asCfgNode() = call |
// NOTE: will not track bound method, `f = func; f()`
call.getFunction().(AttrNode).getObject("getlist") =
requestMultiDictAttribute().asCfgNode()
)
)
}
override string getSourceType() { result = "flask.request input" }
}
}

View File

@@ -0,0 +1,84 @@
| test.py:6 | fail | test_taint | name |
| test.py:6 | fail | test_taint | number |
| test.py:7 | ok | test_taint | foo |
| test.py:14 | ok | test_taint | request.environ |
| test.py:15 | ok | test_taint | request.environ.get(..) |
| test.py:17 | ok | test_taint | request.path |
| test.py:18 | ok | test_taint | request.full_path |
| test.py:19 | ok | test_taint | request.base_url |
| test.py:20 | ok | test_taint | request.url |
| test.py:23 | fail | test_taint | request.accept_charsets.best |
| test.py:24 | fail | test_taint | request.accept_charsets.best_match(..) |
| test.py:25 | ok | test_taint | request.accept_charsets[0] |
| test.py:26 | ok | test_taint | request.accept_encodings |
| test.py:27 | ok | test_taint | request.accept_languages |
| test.py:28 | ok | test_taint | request.accept_mimetypes |
| test.py:31 | ok | test_taint | request.access_control_request_headers |
| test.py:33 | ok | test_taint | request.access_control_request_method |
| test.py:35 | ok | test_taint | request.access_route |
| test.py:36 | ok | test_taint | request.access_route[0] |
| test.py:39 | ok | test_taint | request.args |
| test.py:40 | ok | test_taint | request.args['key'] |
| test.py:41 | ok | test_taint | request.args.getlist(..) |
| test.py:44 | ok | test_taint | request.authorization |
| test.py:45 | ok | test_taint | request.authorization['username'] |
| test.py:46 | fail | test_taint | request.authorization.username |
| test.py:49 | ok | test_taint | request.cache_control |
| test.py:51 | fail | test_taint | request.cache_control.max_age |
| test.py:52 | fail | test_taint | request.cache_control.max_stale |
| test.py:53 | fail | test_taint | request.cache_control.min_fresh |
| test.py:55 | ok | test_taint | request.content_encoding |
| test.py:57 | ok | test_taint | request.content_md5 |
| test.py:59 | ok | test_taint | request.content_type |
| test.py:62 | ok | test_taint | request.cookies |
| test.py:63 | ok | test_taint | request.cookies['key'] |
| test.py:65 | ok | test_taint | request.data |
| test.py:68 | ok | test_taint | request.files |
| test.py:69 | ok | test_taint | request.files['key'] |
| test.py:70 | fail | test_taint | request.files['key'].filename |
| test.py:71 | fail | test_taint | request.files['key'].stream |
| test.py:72 | fail | test_taint | request.files.getlist(..) |
| test.py:75 | ok | test_taint | request.form |
| test.py:76 | ok | test_taint | request.form['key'] |
| test.py:77 | ok | test_taint | request.form.getlist(..) |
| test.py:79 | ok | test_taint | request.get_data() |
| test.py:81 | ok | test_taint | request.get_json() |
| test.py:82 | ok | test_taint | request.get_json()['foo'] |
| test.py:83 | ok | test_taint | request.get_json()['foo']['bar'] |
| test.py:87 | ok | test_taint | request.headers |
| test.py:88 | ok | test_taint | request.headers['key'] |
| test.py:89 | fail | test_taint | request.headers.get_all(..) |
| test.py:90 | fail | test_taint | request.headers.getlist(..) |
| test.py:91 | ok | test_taint | list(..) |
| test.py:92 | fail | test_taint | request.headers.to_wsgi_list() |
| test.py:94 | ok | test_taint | request.json |
| test.py:95 | ok | test_taint | request.json['foo'] |
| test.py:96 | ok | test_taint | request.json['foo']['bar'] |
| test.py:98 | ok | test_taint | request.method |
| test.py:100 | ok | test_taint | request.mimetype |
| test.py:102 | ok | test_taint | request.mimetype_params |
| test.py:104 | ok | test_taint | request.origin |
| test.py:107 | ok | test_taint | request.pragma |
| test.py:109 | ok | test_taint | request.query_string |
| test.py:111 | ok | test_taint | request.referrer |
| test.py:113 | ok | test_taint | request.remote_addr |
| test.py:115 | ok | test_taint | request.remote_user |
| test.py:118 | ok | test_taint | request.stream |
| test.py:119 | ok | test_taint | request.input_stream |
| test.py:121 | ok | test_taint | request.url |
| test.py:123 | ok | test_taint | request.user_agent |
| test.py:126 | ok | test_taint | request.values |
| test.py:127 | ok | test_taint | request.values['key'] |
| test.py:128 | ok | test_taint | request.values.getlist(..) |
| test.py:131 | ok | test_taint | request.view_args |
| test.py:132 | ok | test_taint | request.view_args['key'] |
| test.py:136 | ok | test_taint | request.script_root |
| test.py:137 | ok | test_taint | request.url_root |
| test.py:141 | ok | test_taint | request.charset |
| test.py:142 | ok | test_taint | request.url_charset |
| test.py:146 | ok | test_taint | request.date |
| test.py:149 | ok | test_taint | request.endpoint |
| test.py:154 | ok | test_taint | request.host |
| test.py:155 | ok | test_taint | request.host_url |
| test.py:157 | ok | test_taint | request.scheme |
| test.py:159 | ok | test_taint | request.script_root |

View File

@@ -0,0 +1,6 @@
import experimental.dataflow.tainttracking.TestTaintLib
import experimental.dataflow.RemoteFlowSources
class RemoteFlowTestTaintConfiguration extends TestTaintTrackingConfiguration {
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
}

View File

@@ -0,0 +1,233 @@
from flask import Flask, request
app = Flask(__name__)
@app.route('/test_taint/<name>/<int:number>')
def test_taint(name = "World!", number="0", foo="foo"):
ensure_tainted(name, number)
ensure_not_tainted(foo)
# Manually inspected all fields of the Request object
# https://flask.palletsprojects.com/en/1.1.x/api/#flask.Request
ensure_tainted(
request.environ,
request.environ.get('HTTP_AUTHORIZATION'),
request.path,
request.full_path,
request.base_url,
request.url,
# These request.accept_* properties are instances of subclasses of werkzeug.datastructures.Accept
request.accept_charsets.best,
request.accept_charsets.best_match(["utf-8", "utf-16"]),
request.accept_charsets[0],
request.accept_encodings,
request.accept_languages,
request.accept_mimetypes,
# werkzeug.datastructures.HeaderSet (subclass of collections_abc.MutableSet)
request.access_control_request_headers,
request.access_control_request_method,
request.access_route,
request.access_route[0],
# By default werkzeug.datastructures.ImmutableMultiDict -- although can be changed :\
request.args,
request.args['key'],
request.args.getlist('key'),
# werkzeug.datastructures.Authorization (a dict, with some properties)
request.authorization,
request.authorization['username'],
request.authorization.username,
# werkzeug.datastructures.RequestCacheControl
request.cache_control,
# These should be `int`s, but can be strings... see debug method below
request.cache_control.max_age,
request.cache_control.max_stale,
request.cache_control.min_fresh,
request.content_encoding,
request.content_md5,
request.content_type,
# werkzeug.datastructures.ImmutableTypeConversionDict (which is basically just a dict)
request.cookies,
request.cookies['key'],
request.data,
# a werkzeug.datastructures.MultiDict, mapping [str, werkzeug.datastructures.FileStorage]
request.files,
request.files['key'],
request.files['key'].filename,
request.files['key'].stream,
request.files.getlist('key'),
# By default werkzeug.datastructures.ImmutableMultiDict -- although can be changed :\
request.form,
request.form['key'],
request.form.getlist('key'),
request.get_data(),
request.get_json(),
request.get_json()['foo'],
request.get_json()['foo']['bar'],
# werkzeug.datastructures.EnvironHeaders,
# which has same interface as werkzeug.datastructures.Headers
request.headers,
request.headers['key'],
request.headers.get_all('key'),
request.headers.getlist('key'),
list(request.headers), # (k, v) list
request.headers.to_wsgi_list(), # (k, v) list
request.json,
request.json['foo'],
request.json['foo']['bar'],
request.method,
request.mimetype,
request.mimetype_params,
request.origin,
# werkzeug.datastructures.HeaderSet (subclass of collections_abc.MutableSet)
request.pragma,
request.query_string,
request.referrer,
request.remote_addr,
request.remote_user,
# file-like object
request.stream,
request.input_stream,
request.url,
request.user_agent,
# werkzeug.datastructures.CombinedMultiDict, which is basically just a werkzeug.datastructures.MultiDict
request.values,
request.values['key'],
request.values.getlist('key'),
# dict
request.view_args,
request.view_args['key'],
)
ensure_not_tainted(
request.script_root,
request.url_root,
# The expected charset for parsing request data / urls. Can not be changed by client.
# https://github.com/pallets/werkzeug/blob/4dc8d6ab840d4b78cbd5789cef91b01e3bde01d5/src/werkzeug/wrappers/base_request.py#L71-L72
request.charset,
request.url_charset,
# request.date is a parsed `datetime`
# https://github.com/pallets/werkzeug/blob/4dc8d6ab840d4b78cbd5789cef91b01e3bde01d5/src/werkzeug/wrappers/common_descriptors.py#L76-L83
request.date,
# Assuming that endpoints are not created by user-input seems fair
request.endpoint,
# In some rare circumstances a client could spoof the host, but by default they
# should not be able to. See
# https://werkzeug.palletsprojects.com/en/1.0.x/wrappers/#werkzeug.wrappers.BaseRequest.trusted_hosts
request.host,
request.host_url,
request.scheme,
request.script_root,
)
@app.route('/debug/<foo>/<bar>', methods=['GET'])
def debug(foo, bar):
print("request.view_args", request.view_args)
print("request.headers {!r}".format(request.headers))
print("request.headers['accept'] {!r}".format(request.headers['accept']))
print("request.pragma {!r}".format(request.pragma))
return 'ok'
@app.route('/stream', methods=['POST'])
def stream():
print(request.path)
s = request.stream
print(s)
# just works :)
print(s.read())
return 'ok'
@app.route('/input_stream', methods=['POST'])
def input_stream():
print(request.path)
s = request.input_stream
print(s)
# hangs until client stops connection, since max number of bytes to read must
# be handled manually
print(s.read())
return 'ok'
@app.route('/form', methods=['POST'])
def form():
print(request.path)
print("request.form", request.form)
return 'ok'
@app.route('/cache_control', methods=['POST'])
def cache_control():
print(request.path)
print("request.cache_control.max_age", request.cache_control.max_age, type(request.cache_control.max_age))
print("request.cache_control.max_stale", request.cache_control.max_stale, type(request.cache_control.max_stale))
print("request.cache_control.min_fresh", request.cache_control.min_fresh, type(request.cache_control.min_fresh))
return 'ok'
@app.route('/file_upload', methods=['POST'])
def file_upload():
print(request.path)
for k,v in request.files.items():
print(k, v, v.name, v.filename, v.stream)
return 'ok'
# curl --header "My-Header: some-value" http://localhost:5000/debug/fooval/barval
# curl --header "Pragma: foo, bar" --header "Pragma: stuff, foo" http://localhost:5000/debug/fooval/barval
# curl -X POST --data 'wat' http://localhost:5000/stream
# curl -X POST --data 'wat' http://localhost:5000/input_stream
# curl --form foo=foo --form foo=123 http://localhost:5000/form
# curl --header "Cache-Control: max-age=foo, max-stale=bar, min-fresh=baz" http://localhost:5000/cache_control
# curl --header "Cache-Control: max-age=1, max-stale=2, min-fresh=3" http://localhost:5000/cache_control
# curl -F myfile=@<some-file> localhost:5000/file_upload
if __name__ == "__main__":
app.run(debug=True)

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=1