mirror of
https://github.com/github/codeql.git
synced 2026-05-01 11:45:14 +02:00
Python: First attempt at modeling Flask
This commit is contained in:
@@ -7,4 +7,115 @@ private import experimental.dataflow.DataFlow
|
||||
private import experimental.dataflow.RemoteFlowSources
|
||||
private import experimental.semmle.python.Concepts
|
||||
|
||||
private module Flask { }
|
||||
private module Flask {
|
||||
/** Gets a reference to the `flask` module. */
|
||||
DataFlow::Node flask(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importModule("flask")
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = flask(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `flask` module. */
|
||||
DataFlow::Node flask() { result = flask(DataFlow::TypeTracker::end()) }
|
||||
|
||||
module flask {
|
||||
/** Gets a reference to the `flask.request` object. */
|
||||
DataFlow::Node request(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importMember("flask", "request")
|
||||
or
|
||||
t.startInAttr("request") and
|
||||
result = flask()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = flask::request(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `flask.request` object. */
|
||||
DataFlow::Node request() { result = flask::request(DataFlow::TypeTracker::end()) }
|
||||
}
|
||||
|
||||
// TODO: Do we even need this class then? :|
|
||||
private class RequestSource extends RemoteFlowSource::Range {
|
||||
RequestSource() { this = flask::request() }
|
||||
|
||||
override string getSourceType() { result = "flask.request" }
|
||||
}
|
||||
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
|
||||
/** Gets a reference to the MultiDict attributes of `flask.request`. */
|
||||
DataFlow::Node requestMultiDictAttribute(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result.asCfgNode().(AttrNode).getObject(["args", "values", "form"]) =
|
||||
flask::request().asCfgNode()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = requestMultiDictAttribute(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the MultiDict attributes of `flask.request`. */
|
||||
DataFlow::Node requestMultiDictAttribute() {
|
||||
result = requestMultiDictAttribute(DataFlow::TypeTracker::end())
|
||||
}
|
||||
|
||||
private class RequestInputAccess extends RemoteFlowSource::Range {
|
||||
RequestInputAccess() {
|
||||
// attributes
|
||||
exists(AttrNode attr, string name |
|
||||
this.asCfgNode() = attr and attr.getObject(name) = flask::request().asCfgNode()
|
||||
|
|
||||
name in ["path",
|
||||
// string
|
||||
"full_path", "base_url", "url", "access_control_request_method", "content_encoding",
|
||||
"content_md5", "content_type", "data", "method", "mimetype", "origin", "query_string",
|
||||
"referrer", "remote_addr", "remote_user", "user_agent",
|
||||
// dict
|
||||
"environ", "cookies", "mimetype_params", "view_args",
|
||||
//
|
||||
"args", "values", "form",
|
||||
// json
|
||||
"json",
|
||||
// List[str]
|
||||
"access_route",
|
||||
// file-like
|
||||
"stream", "input_stream",
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.HeaderSet
|
||||
"access_control_request_headers", "pragma",
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Accept
|
||||
// TODO: Kinda badly modeled for now -- has type List[Tuple[value, quality]], and some extra methods
|
||||
"accept_charsets", "accept_encodings", "accept_languages", "accept_mimetypes",
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Authorization
|
||||
// TODO: dict subclass with extra attributes like `username` and `password`
|
||||
"authorization",
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.RequestCacheControl
|
||||
// TODO: has attributes like `no_cache`, and `to_header` method (actually, many of these models do)
|
||||
"cache_control",
|
||||
// TODO: MultiDict[FileStorage]
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
|
||||
"files",
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers
|
||||
// TODO: dict-like with wsgiref.headers.Header compatibility methods
|
||||
"headers"]
|
||||
)
|
||||
or
|
||||
// methods
|
||||
exists(CallNode call, string name | this.asCfgNode() = call |
|
||||
// NOTE: will not track bound method, `f = func; f()`
|
||||
name in ["get_data", "get_json"] and
|
||||
call.getFunction().(AttrNode).getObject(name) = flask::request().asCfgNode()
|
||||
)
|
||||
or
|
||||
// multi dict special handling
|
||||
(
|
||||
this = requestMultiDictAttribute()
|
||||
or
|
||||
exists(CallNode call | this.asCfgNode() = call |
|
||||
// NOTE: will not track bound method, `f = func; f()`
|
||||
call.getFunction().(AttrNode).getObject("getlist") =
|
||||
requestMultiDictAttribute().asCfgNode()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
override string getSourceType() { result = "flask.request input" }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
| test.py:6 | fail | test_taint | name |
|
||||
| test.py:6 | fail | test_taint | number |
|
||||
| test.py:7 | ok | test_taint | foo |
|
||||
| test.py:14 | ok | test_taint | request.environ |
|
||||
| test.py:15 | ok | test_taint | request.environ.get(..) |
|
||||
| test.py:17 | ok | test_taint | request.path |
|
||||
| test.py:18 | ok | test_taint | request.full_path |
|
||||
| test.py:19 | ok | test_taint | request.base_url |
|
||||
| test.py:20 | ok | test_taint | request.url |
|
||||
| test.py:23 | fail | test_taint | request.accept_charsets.best |
|
||||
| test.py:24 | fail | test_taint | request.accept_charsets.best_match(..) |
|
||||
| test.py:25 | ok | test_taint | request.accept_charsets[0] |
|
||||
| test.py:26 | ok | test_taint | request.accept_encodings |
|
||||
| test.py:27 | ok | test_taint | request.accept_languages |
|
||||
| test.py:28 | ok | test_taint | request.accept_mimetypes |
|
||||
| test.py:31 | ok | test_taint | request.access_control_request_headers |
|
||||
| test.py:33 | ok | test_taint | request.access_control_request_method |
|
||||
| test.py:35 | ok | test_taint | request.access_route |
|
||||
| test.py:36 | ok | test_taint | request.access_route[0] |
|
||||
| test.py:39 | ok | test_taint | request.args |
|
||||
| test.py:40 | ok | test_taint | request.args['key'] |
|
||||
| test.py:41 | ok | test_taint | request.args.getlist(..) |
|
||||
| test.py:44 | ok | test_taint | request.authorization |
|
||||
| test.py:45 | ok | test_taint | request.authorization['username'] |
|
||||
| test.py:46 | fail | test_taint | request.authorization.username |
|
||||
| test.py:49 | ok | test_taint | request.cache_control |
|
||||
| test.py:51 | fail | test_taint | request.cache_control.max_age |
|
||||
| test.py:52 | fail | test_taint | request.cache_control.max_stale |
|
||||
| test.py:53 | fail | test_taint | request.cache_control.min_fresh |
|
||||
| test.py:55 | ok | test_taint | request.content_encoding |
|
||||
| test.py:57 | ok | test_taint | request.content_md5 |
|
||||
| test.py:59 | ok | test_taint | request.content_type |
|
||||
| test.py:62 | ok | test_taint | request.cookies |
|
||||
| test.py:63 | ok | test_taint | request.cookies['key'] |
|
||||
| test.py:65 | ok | test_taint | request.data |
|
||||
| test.py:68 | ok | test_taint | request.files |
|
||||
| test.py:69 | ok | test_taint | request.files['key'] |
|
||||
| test.py:70 | fail | test_taint | request.files['key'].filename |
|
||||
| test.py:71 | fail | test_taint | request.files['key'].stream |
|
||||
| test.py:72 | fail | test_taint | request.files.getlist(..) |
|
||||
| test.py:75 | ok | test_taint | request.form |
|
||||
| test.py:76 | ok | test_taint | request.form['key'] |
|
||||
| test.py:77 | ok | test_taint | request.form.getlist(..) |
|
||||
| test.py:79 | ok | test_taint | request.get_data() |
|
||||
| test.py:81 | ok | test_taint | request.get_json() |
|
||||
| test.py:82 | ok | test_taint | request.get_json()['foo'] |
|
||||
| test.py:83 | ok | test_taint | request.get_json()['foo']['bar'] |
|
||||
| test.py:87 | ok | test_taint | request.headers |
|
||||
| test.py:88 | ok | test_taint | request.headers['key'] |
|
||||
| test.py:89 | fail | test_taint | request.headers.get_all(..) |
|
||||
| test.py:90 | fail | test_taint | request.headers.getlist(..) |
|
||||
| test.py:91 | ok | test_taint | list(..) |
|
||||
| test.py:92 | fail | test_taint | request.headers.to_wsgi_list() |
|
||||
| test.py:94 | ok | test_taint | request.json |
|
||||
| test.py:95 | ok | test_taint | request.json['foo'] |
|
||||
| test.py:96 | ok | test_taint | request.json['foo']['bar'] |
|
||||
| test.py:98 | ok | test_taint | request.method |
|
||||
| test.py:100 | ok | test_taint | request.mimetype |
|
||||
| test.py:102 | ok | test_taint | request.mimetype_params |
|
||||
| test.py:104 | ok | test_taint | request.origin |
|
||||
| test.py:107 | ok | test_taint | request.pragma |
|
||||
| test.py:109 | ok | test_taint | request.query_string |
|
||||
| test.py:111 | ok | test_taint | request.referrer |
|
||||
| test.py:113 | ok | test_taint | request.remote_addr |
|
||||
| test.py:115 | ok | test_taint | request.remote_user |
|
||||
| test.py:118 | ok | test_taint | request.stream |
|
||||
| test.py:119 | ok | test_taint | request.input_stream |
|
||||
| test.py:121 | ok | test_taint | request.url |
|
||||
| test.py:123 | ok | test_taint | request.user_agent |
|
||||
| test.py:126 | ok | test_taint | request.values |
|
||||
| test.py:127 | ok | test_taint | request.values['key'] |
|
||||
| test.py:128 | ok | test_taint | request.values.getlist(..) |
|
||||
| test.py:131 | ok | test_taint | request.view_args |
|
||||
| test.py:132 | ok | test_taint | request.view_args['key'] |
|
||||
| test.py:136 | ok | test_taint | request.script_root |
|
||||
| test.py:137 | ok | test_taint | request.url_root |
|
||||
| test.py:141 | ok | test_taint | request.charset |
|
||||
| test.py:142 | ok | test_taint | request.url_charset |
|
||||
| test.py:146 | ok | test_taint | request.date |
|
||||
| test.py:149 | ok | test_taint | request.endpoint |
|
||||
| test.py:154 | ok | test_taint | request.host |
|
||||
| test.py:155 | ok | test_taint | request.host_url |
|
||||
| test.py:157 | ok | test_taint | request.scheme |
|
||||
| test.py:159 | ok | test_taint | request.script_root |
|
||||
@@ -0,0 +1,6 @@
|
||||
import experimental.dataflow.tainttracking.TestTaintLib
|
||||
import experimental.dataflow.RemoteFlowSources
|
||||
|
||||
class RemoteFlowTestTaintConfiguration extends TestTaintTrackingConfiguration {
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
}
|
||||
@@ -0,0 +1,233 @@
|
||||
from flask import Flask, request
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/test_taint/<name>/<int:number>')
|
||||
def test_taint(name = "World!", number="0", foo="foo"):
|
||||
ensure_tainted(name, number)
|
||||
ensure_not_tainted(foo)
|
||||
|
||||
# Manually inspected all fields of the Request object
|
||||
# https://flask.palletsprojects.com/en/1.1.x/api/#flask.Request
|
||||
|
||||
ensure_tainted(
|
||||
|
||||
request.environ,
|
||||
request.environ.get('HTTP_AUTHORIZATION'),
|
||||
|
||||
request.path,
|
||||
request.full_path,
|
||||
request.base_url,
|
||||
request.url,
|
||||
|
||||
# These request.accept_* properties are instances of subclasses of werkzeug.datastructures.Accept
|
||||
request.accept_charsets.best,
|
||||
request.accept_charsets.best_match(["utf-8", "utf-16"]),
|
||||
request.accept_charsets[0],
|
||||
request.accept_encodings,
|
||||
request.accept_languages,
|
||||
request.accept_mimetypes,
|
||||
|
||||
# werkzeug.datastructures.HeaderSet (subclass of collections_abc.MutableSet)
|
||||
request.access_control_request_headers,
|
||||
|
||||
request.access_control_request_method,
|
||||
|
||||
request.access_route,
|
||||
request.access_route[0],
|
||||
|
||||
# By default werkzeug.datastructures.ImmutableMultiDict -- although can be changed :\
|
||||
request.args,
|
||||
request.args['key'],
|
||||
request.args.getlist('key'),
|
||||
|
||||
# werkzeug.datastructures.Authorization (a dict, with some properties)
|
||||
request.authorization,
|
||||
request.authorization['username'],
|
||||
request.authorization.username,
|
||||
|
||||
# werkzeug.datastructures.RequestCacheControl
|
||||
request.cache_control,
|
||||
# These should be `int`s, but can be strings... see debug method below
|
||||
request.cache_control.max_age,
|
||||
request.cache_control.max_stale,
|
||||
request.cache_control.min_fresh,
|
||||
|
||||
request.content_encoding,
|
||||
|
||||
request.content_md5,
|
||||
|
||||
request.content_type,
|
||||
|
||||
# werkzeug.datastructures.ImmutableTypeConversionDict (which is basically just a dict)
|
||||
request.cookies,
|
||||
request.cookies['key'],
|
||||
|
||||
request.data,
|
||||
|
||||
# a werkzeug.datastructures.MultiDict, mapping [str, werkzeug.datastructures.FileStorage]
|
||||
request.files,
|
||||
request.files['key'],
|
||||
request.files['key'].filename,
|
||||
request.files['key'].stream,
|
||||
request.files.getlist('key'),
|
||||
|
||||
# By default werkzeug.datastructures.ImmutableMultiDict -- although can be changed :\
|
||||
request.form,
|
||||
request.form['key'],
|
||||
request.form.getlist('key'),
|
||||
|
||||
request.get_data(),
|
||||
|
||||
request.get_json(),
|
||||
request.get_json()['foo'],
|
||||
request.get_json()['foo']['bar'],
|
||||
|
||||
# werkzeug.datastructures.EnvironHeaders,
|
||||
# which has same interface as werkzeug.datastructures.Headers
|
||||
request.headers,
|
||||
request.headers['key'],
|
||||
request.headers.get_all('key'),
|
||||
request.headers.getlist('key'),
|
||||
list(request.headers), # (k, v) list
|
||||
request.headers.to_wsgi_list(), # (k, v) list
|
||||
|
||||
request.json,
|
||||
request.json['foo'],
|
||||
request.json['foo']['bar'],
|
||||
|
||||
request.method,
|
||||
|
||||
request.mimetype,
|
||||
|
||||
request.mimetype_params,
|
||||
|
||||
request.origin,
|
||||
|
||||
# werkzeug.datastructures.HeaderSet (subclass of collections_abc.MutableSet)
|
||||
request.pragma,
|
||||
|
||||
request.query_string,
|
||||
|
||||
request.referrer,
|
||||
|
||||
request.remote_addr,
|
||||
|
||||
request.remote_user,
|
||||
|
||||
# file-like object
|
||||
request.stream,
|
||||
request.input_stream,
|
||||
|
||||
request.url,
|
||||
|
||||
request.user_agent,
|
||||
|
||||
# werkzeug.datastructures.CombinedMultiDict, which is basically just a werkzeug.datastructures.MultiDict
|
||||
request.values,
|
||||
request.values['key'],
|
||||
request.values.getlist('key'),
|
||||
|
||||
# dict
|
||||
request.view_args,
|
||||
request.view_args['key'],
|
||||
)
|
||||
|
||||
ensure_not_tainted(
|
||||
request.script_root,
|
||||
request.url_root,
|
||||
|
||||
# The expected charset for parsing request data / urls. Can not be changed by client.
|
||||
# https://github.com/pallets/werkzeug/blob/4dc8d6ab840d4b78cbd5789cef91b01e3bde01d5/src/werkzeug/wrappers/base_request.py#L71-L72
|
||||
request.charset,
|
||||
request.url_charset,
|
||||
|
||||
# request.date is a parsed `datetime`
|
||||
# https://github.com/pallets/werkzeug/blob/4dc8d6ab840d4b78cbd5789cef91b01e3bde01d5/src/werkzeug/wrappers/common_descriptors.py#L76-L83
|
||||
request.date,
|
||||
|
||||
# Assuming that endpoints are not created by user-input seems fair
|
||||
request.endpoint,
|
||||
|
||||
# In some rare circumstances a client could spoof the host, but by default they
|
||||
# should not be able to. See
|
||||
# https://werkzeug.palletsprojects.com/en/1.0.x/wrappers/#werkzeug.wrappers.BaseRequest.trusted_hosts
|
||||
request.host,
|
||||
request.host_url,
|
||||
|
||||
request.scheme,
|
||||
|
||||
request.script_root,
|
||||
)
|
||||
|
||||
|
||||
@app.route('/debug/<foo>/<bar>', methods=['GET'])
|
||||
def debug(foo, bar):
|
||||
print("request.view_args", request.view_args)
|
||||
|
||||
print("request.headers {!r}".format(request.headers))
|
||||
print("request.headers['accept'] {!r}".format(request.headers['accept']))
|
||||
|
||||
print("request.pragma {!r}".format(request.pragma))
|
||||
|
||||
return 'ok'
|
||||
|
||||
@app.route('/stream', methods=['POST'])
|
||||
def stream():
|
||||
print(request.path)
|
||||
s = request.stream
|
||||
print(s)
|
||||
# just works :)
|
||||
print(s.read())
|
||||
|
||||
return 'ok'
|
||||
|
||||
@app.route('/input_stream', methods=['POST'])
|
||||
def input_stream():
|
||||
print(request.path)
|
||||
s = request.input_stream
|
||||
print(s)
|
||||
# hangs until client stops connection, since max number of bytes to read must
|
||||
# be handled manually
|
||||
print(s.read())
|
||||
|
||||
return 'ok'
|
||||
|
||||
@app.route('/form', methods=['POST'])
|
||||
def form():
|
||||
print(request.path)
|
||||
print("request.form", request.form)
|
||||
|
||||
return 'ok'
|
||||
|
||||
@app.route('/cache_control', methods=['POST'])
|
||||
def cache_control():
|
||||
print(request.path)
|
||||
print("request.cache_control.max_age", request.cache_control.max_age, type(request.cache_control.max_age))
|
||||
print("request.cache_control.max_stale", request.cache_control.max_stale, type(request.cache_control.max_stale))
|
||||
print("request.cache_control.min_fresh", request.cache_control.min_fresh, type(request.cache_control.min_fresh))
|
||||
|
||||
return 'ok'
|
||||
|
||||
@app.route('/file_upload', methods=['POST'])
|
||||
def file_upload():
|
||||
print(request.path)
|
||||
for k,v in request.files.items():
|
||||
print(k, v, v.name, v.filename, v.stream)
|
||||
|
||||
return 'ok'
|
||||
|
||||
# curl --header "My-Header: some-value" http://localhost:5000/debug/fooval/barval
|
||||
# curl --header "Pragma: foo, bar" --header "Pragma: stuff, foo" http://localhost:5000/debug/fooval/barval
|
||||
|
||||
# curl -X POST --data 'wat' http://localhost:5000/stream
|
||||
# curl -X POST --data 'wat' http://localhost:5000/input_stream
|
||||
|
||||
# curl --form foo=foo --form foo=123 http://localhost:5000/form
|
||||
|
||||
# curl --header "Cache-Control: max-age=foo, max-stale=bar, min-fresh=baz" http://localhost:5000/cache_control
|
||||
# curl --header "Cache-Control: max-age=1, max-stale=2, min-fresh=3" http://localhost:5000/cache_control
|
||||
|
||||
# curl -F myfile=@<some-file> localhost:5000/file_upload
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True)
|
||||
1
python/ql/test/experimental/library-tests/options
Normal file
1
python/ql/test/experimental/library-tests/options
Normal file
@@ -0,0 +1 @@
|
||||
semmle-extractor-options: --max-import-depth=1
|
||||
Reference in New Issue
Block a user