Merge pull request #3314 from RasmusWL/python-model-stdlib-http.server

Approved by tausbn
This commit is contained in:
semmle-qlci
2020-04-24 15:27:21 +01:00
committed by GitHub
11 changed files with 393 additions and 0 deletions

View File

@@ -7,3 +7,4 @@ import semmle.python.web.bottle.Request
import semmle.python.web.turbogears.Request
import semmle.python.web.falcon.Request
import semmle.python.web.cherrypy.Request
import semmle.python.web.stdlib.Request

View File

@@ -7,3 +7,4 @@ import semmle.python.web.bottle.Response
import semmle.python.web.turbogears.Response
import semmle.python.web.falcon.Response
import semmle.python.web.cherrypy.Response
import semmle.python.web.stdlib.Response

View File

@@ -0,0 +1,124 @@
/**
* Provides the sources and taint-flow for HTTP servers defined using the standard library (stdlib).
* Specifically, we model `HttpRequestTaintSource`s from instances of `BaseHTTPRequestHandler`
* (or subclasses) and form parsing using `cgi.FieldStorage`.
*/
import python
import semmle.python.security.TaintTracking
import semmle.python.web.Http
/** Source of BaseHTTPRequestHandler instances. */
class StdLibRequestSource extends HttpRequestTaintSource {
StdLibRequestSource() {
exists(ClassValue cls |
cls.getABaseType+() = Value::named("BaseHTTPServer.BaseHTTPRequestHandler")
or
cls.getABaseType+() = Value::named("http.server.BaseHTTPRequestHandler")
|
this.(ControlFlowNode).pointsTo().getClass() = cls
)
}
override predicate isSourceOf(TaintKind kind) { kind instanceof BaseHTTPRequestHandlerKind }
}
/** TaintKind for an instance of BaseHTTPRequestHandler. */
class BaseHTTPRequestHandlerKind extends TaintKind {
BaseHTTPRequestHandlerKind() { this = "BaseHTTPRequestHandlerKind" }
override TaintKind getTaintOfAttribute(string name) {
name in ["requestline", "path"] and
result instanceof ExternalStringKind
or
name = "headers" and
result instanceof HTTPMessageKind
or
name = "rfile" and
result instanceof ExternalFileObject
}
}
/** TaintKind for headers (instance of HTTPMessage). */
class HTTPMessageKind extends ExternalStringDictKind {
override TaintKind getTaintOfMethodResult(string name) {
result = super.getTaintOfMethodResult(name)
or
name = "get_all" and
result.(SequenceKind).getItem() = this.getValue()
or
name in ["as_bytes", "as_string"] and
result instanceof ExternalStringKind
}
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
result = super.getTaintForFlowStep(fromnode, tonode)
or
exists(ClassValue cls | cls = ClassValue::unicode() or cls = ClassValue::bytes() |
tonode = cls.getACall() and
tonode.(CallNode).getArg(0) = fromnode and
result instanceof ExternalStringKind
)
}
}
/** Source of parsed HTTP forms (by using the `cgi` module). */
class CgiFieldStorageSource extends HttpRequestTaintSource {
CgiFieldStorageSource() { this = Value::named("cgi.FieldStorage").getACall() }
override predicate isSourceOf(TaintKind kind) { kind instanceof CgiFieldStorageFormKind }
}
/** TaintKind for a parsed HTTP form. */
class CgiFieldStorageFormKind extends TaintKind {
/*
* There is a slight difference between how we model form/fields and how it is handled by the code.
* In the code
* ```
* form = cgi.FieldStorage()
* field = form['myfield']
* ```
* both `form` and `field` have the type `cgi.FieldStorage`. This allows the code to represent
* nested forms as `form['nested_form']['myfield']`. However, since HTML forms can't be nested
* we ignore that detail since it allows for a more clean modeling.
*/
CgiFieldStorageFormKind() { this = "CgiFieldStorageFormKind" }
override TaintKind getTaintOfAttribute(string name) {
name = "value" and result.(SequenceKind).getItem() instanceof CgiFieldStorageFieldKind
}
override TaintKind getTaintOfMethodResult(string name) {
name = "getvalue" and
(
result instanceof ExternalStringKind
or
result.(SequenceKind).getItem() instanceof ExternalStringKind
)
or
name = "getfirst" and
result instanceof ExternalStringKind
or
name = "getlist" and
result.(SequenceKind).getItem() instanceof ExternalStringKind
}
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
tonode.(SubscriptNode).getObject() = fromnode and
(
result instanceof CgiFieldStorageFieldKind
or
result.(SequenceKind).getItem() instanceof CgiFieldStorageFieldKind
)
}
}
/** TaintKind for the field of a parsed HTTP form. */
class CgiFieldStorageFieldKind extends TaintKind {
CgiFieldStorageFieldKind() { this = "CgiFieldStorageFieldKind" }
override TaintKind getTaintOfAttribute(string name) {
name in ["filename", "value"] and result instanceof ExternalStringKind
or
name = "file" and result instanceof ExternalFileObject
}
}

View File

@@ -0,0 +1,43 @@
/**
* Provides the sinks for HTTP servers defined with standard library (stdlib).
*/
import python
import semmle.python.security.TaintTracking
import semmle.python.web.Http
private predicate is_wfile(AttrNode wfile) {
exists(ClassValue cls |
// Python 2
cls.getABaseType+() = Value::named("BaseHTTPServer.BaseHTTPRequestHandler")
or
// Python 3
cls.getABaseType+() = Value::named("http.server.BaseHTTPRequestHandler")
|
wfile.getObject("wfile").pointsTo().getClass() = cls
)
}
/** Sink for `h.wfile.write` where `h` is an instance of BaseHTTPRequestHandler. */
class StdLibWFileWriteSink extends HttpResponseTaintSink {
StdLibWFileWriteSink() {
exists(CallNode call |
is_wfile(call.getFunction().(AttrNode).getObject("write")) and
call.getArg(0) = this
)
}
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
}
/** Sink for `h.wfile.writelines` where `h` is an instance of BaseHTTPRequestHandler. */
class StdLibWFileWritelinesSink extends HttpResponseTaintSink {
StdLibWFileWritelinesSink() {
exists(CallNode call |
is_wfile(call.getFunction().(AttrNode).getObject("writelines")) and
call.getArg(0) = this
)
}
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringSequenceKind }
}

View File

@@ -0,0 +1,2 @@
| test.py:72:26:72:58 | Taint sink | externally controlled string |
| test.py:73:31:73:54 | Taint sink | [externally controlled string] |

View File

@@ -0,0 +1,7 @@
import python
import semmle.python.web.HttpResponse
import semmle.python.security.strings.Untrusted
from HttpResponseTaintSink sink, TaintKind kind
where sink.sinks(kind)
select sink, kind

View File

@@ -0,0 +1,34 @@
| test.py:18:13:18:16 | self | BaseHTTPRequestHandlerKind |
| test.py:20:13:20:16 | self | BaseHTTPRequestHandlerKind |
| test.py:22:13:22:16 | self | BaseHTTPRequestHandlerKind |
| test.py:24:13:24:16 | self | BaseHTTPRequestHandlerKind |
| test.py:25:13:25:16 | self | BaseHTTPRequestHandlerKind |
| test.py:26:13:26:16 | self | BaseHTTPRequestHandlerKind |
| test.py:27:13:27:16 | self | BaseHTTPRequestHandlerKind |
| test.py:28:13:28:16 | self | BaseHTTPRequestHandlerKind |
| test.py:29:13:29:16 | self | BaseHTTPRequestHandlerKind |
| test.py:30:13:30:16 | self | BaseHTTPRequestHandlerKind |
| test.py:31:13:31:16 | self | BaseHTTPRequestHandlerKind |
| test.py:32:13:32:16 | self | BaseHTTPRequestHandlerKind |
| test.py:33:17:33:20 | self | BaseHTTPRequestHandlerKind |
| test.py:34:19:34:22 | self | BaseHTTPRequestHandlerKind |
| test.py:36:13:36:16 | self | BaseHTTPRequestHandlerKind |
| test.py:37:13:37:16 | self | BaseHTTPRequestHandlerKind |
| test.py:40:16:44:9 | Attribute() | CgiFieldStorageFormKind |
| test.py:41:13:41:16 | self | BaseHTTPRequestHandlerKind |
| test.py:42:13:42:16 | self | BaseHTTPRequestHandlerKind |
| test.py:43:64:43:67 | self | BaseHTTPRequestHandlerKind |
| test.py:69:9:69:12 | self | BaseHTTPRequestHandlerKind |
| test.py:70:9:70:12 | self | BaseHTTPRequestHandlerKind |
| test.py:71:9:71:12 | self | BaseHTTPRequestHandlerKind |
| test.py:72:9:72:12 | self | BaseHTTPRequestHandlerKind |
| test.py:73:9:73:12 | self | BaseHTTPRequestHandlerKind |
| test.py:74:15:74:18 | self | BaseHTTPRequestHandlerKind |
| test.py:78:16:82:9 | Attribute() | CgiFieldStorageFormKind |
| test.py:79:13:79:16 | self | BaseHTTPRequestHandlerKind |
| test.py:80:13:80:16 | self | BaseHTTPRequestHandlerKind |
| test.py:81:64:81:67 | self | BaseHTTPRequestHandlerKind |
| test.py:85:13:85:16 | self | BaseHTTPRequestHandlerKind |
| test.py:86:13:86:16 | self | BaseHTTPRequestHandlerKind |
| test.py:96:9:96:12 | self | BaseHTTPRequestHandlerKind |
| test.py:97:9:97:12 | self | BaseHTTPRequestHandlerKind |

View File

@@ -0,0 +1,9 @@
import python
import semmle.python.web.HttpRequest
import semmle.python.security.strings.Untrusted
from HttpRequestTaintSource source, TaintKind kind
where
source.isSourceOf(kind) and
source.getLocation().getFile().getShortName() != "cgi.py"
select source.(ControlFlowNode).getNode(), kind

View File

@@ -0,0 +1,32 @@
| test.py:18 | ok | taint_sources | self | BaseHTTPRequestHandlerKind |
| test.py:20 | ok | taint_sources | Attribute | externally controlled string |
| test.py:22 | ok | taint_sources | Attribute | externally controlled string |
| test.py:24 | ok | taint_sources | Attribute | {externally controlled string} |
| test.py:25 | ok | taint_sources | Subscript | externally controlled string |
| test.py:26 | ok | taint_sources | Attribute() | externally controlled string |
| test.py:27 | ok | taint_sources | Attribute() | [externally controlled string] |
| test.py:28 | fail | taint_sources | Attribute() | <NO TAINT> |
| test.py:29 | ok | taint_sources | Attribute() | [externally controlled string] |
| test.py:30 | fail | taint_sources | Attribute() | <NO TAINT> |
| test.py:31 | ok | taint_sources | Attribute() | externally controlled string |
| test.py:32 | ok | taint_sources | Attribute() | externally controlled string |
| test.py:33 | ok | taint_sources | str() | externally controlled string |
| test.py:34 | ok | taint_sources | bytes() | externally controlled string |
| test.py:36 | ok | taint_sources | Attribute | file[externally controlled string] |
| test.py:37 | ok | taint_sources | Attribute() | externally controlled string |
| test.py:47 | ok | taint_sources | form | CgiFieldStorageFormKind |
| test.py:49 | ok | taint_sources | Subscript | CgiFieldStorageFieldKind |
| test.py:49 | ok | taint_sources | Subscript | [CgiFieldStorageFieldKind] |
| test.py:50 | ok | taint_sources | Attribute | externally controlled string |
| test.py:51 | ok | taint_sources | Attribute | file[externally controlled string] |
| test.py:52 | ok | taint_sources | Attribute | externally controlled string |
| test.py:53 | ok | taint_sources | Subscript | CgiFieldStorageFieldKind |
| test.py:54 | ok | taint_sources | Attribute | externally controlled string |
| test.py:55 | ok | taint_sources | Attribute | file[externally controlled string] |
| test.py:56 | ok | taint_sources | Attribute | externally controlled string |
| test.py:58 | ok | taint_sources | Attribute() | [externally controlled string] |
| test.py:58 | ok | taint_sources | Attribute() | externally controlled string |
| test.py:59 | ok | taint_sources | Subscript | externally controlled string |
| test.py:61 | ok | taint_sources | Attribute() | externally controlled string |
| test.py:63 | ok | taint_sources | Attribute() | [externally controlled string] |
| test.py:64 | ok | taint_sources | Subscript | externally controlled string |

View File

@@ -0,0 +1,32 @@
import python
import semmle.python.security.TaintTracking
import semmle.python.web.HttpRequest
import semmle.python.security.strings.Untrusted
from
Call call, Expr arg, boolean expected_taint, boolean has_taint, string test_res,
string taint_string
where
call.getLocation().getFile().getShortName() = "test.py" and
(
call.getFunc().(Name).getId() = "ensure_tainted" and
expected_taint = true
or
call.getFunc().(Name).getId() = "ensure_not_tainted" and
expected_taint = false
) and
arg = call.getAnArg() and
(
not exists(TaintedNode tainted | tainted.getAstNode() = arg) and
taint_string = "<NO TAINT>" and
has_taint = false
or
exists(TaintedNode tainted | tainted.getAstNode() = arg |
taint_string = tainted.getTaintKind().toString()
) and
has_taint = true
) and
if expected_taint = has_taint then test_res = "ok " else test_res = "fail"
// if expected_taint = has_taint then test_res = "✓" else test_res = "✕"
select arg.getLocation().toString(), test_res, call.getScope().(Function).getName(), arg.toString(),
taint_string

View File

@@ -0,0 +1,108 @@
import sys
import os
import cgi
if sys.version_info[0] == 2:
from BaseHTTPServer import BaseHTTPRequestHandler
from BaseHTTPServer import HTTPServer
if sys.version_info[0] == 3:
from http.server import HTTPServer, BaseHTTPRequestHandler
class MyHandler(BaseHTTPRequestHandler):
def taint_sources(self):
ensure_tainted(
self,
self.requestline,
self.path,
self.headers,
self.headers['Foo'],
self.headers.get('Foo'),
self.headers.get_all('Foo'),
self.headers.keys(),
self.headers.values(),
self.headers.items(),
self.headers.as_bytes(),
self.headers.as_string(),
str(self.headers),
bytes(self.headers),
self.rfile,
self.rfile.read(),
)
form = cgi.FieldStorage(
self.rfile,
self.headers,
environ={'REQUEST_METHOD': 'POST', 'CONTENT_TYPE': self.headers.get('content-type')},
)
ensure_tainted(
form,
form['key'],
form['key'].value,
form['key'].file,
form['key'].filename,
form['key'][0], # will be a list, if multiple fields named "key" are provided
form['key'][0].value,
form['key'][0].file,
form['key'][0].filename,
form.getvalue('key'),
form.getvalue('key')[0], # will be a list, if multiple fields named "key" are provided
form.getfirst('key'),
form.getlist('key'),
form.getlist('key')[0],
)
def do_GET(self):
# send_response will log a line to stderr
self.send_response(200)
self.send_header("Content-type", "text/plain; charset=utf-8")
self.end_headers()
self.wfile.write(b"Hello BaseHTTPRequestHandler\n")
self.wfile.writelines([b"1\n", b"2\n", b"3\n"])
print(self.headers)
def do_POST(self):
form = cgi.FieldStorage(
self.rfile,
self.headers,
environ={'REQUEST_METHOD': 'POST', 'CONTENT_TYPE': self.headers.get('content-type')},
)
if 'myfile' not in form:
self.send_response(422)
self.end_headers()
return
field = form['myfile']
field.file.seek(0, os.SEEK_END)
filesize = field.file.tell()
print("Uploaded {!r} with {} bytes".format(field.filename, filesize))
self.send_response(200)
self.end_headers()
if __name__ == "__main__":
server = HTTPServer(("127.0.0.1", 8080), MyHandler)
server.serve_forever()
# Headers works case insensitvely, so self.headers['foo'] == self.headers['FOO']
# curl localhost:8080 --header "Foo: 1" --header "foo: 2"
# To test file submission through forms, use
# curl -F myfile=@<yourfile> localhost:8080