Merge pull request #7131 from RasmusWL/wsgiref.simple_server

Python: Model `wsgiref.simple_server` applications
This commit is contained in:
Rasmus Wriedt Larsen
2021-11-24 14:22:23 +01:00
committed by GitHub
7 changed files with 315 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of `wsgiref.simple_server` applications, leading to new remote flow sources.

View File

@@ -10,6 +10,8 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
private import semmle.python.frameworks.internal.PoorMansFunctionResolution
private import semmle.python.frameworks.internal.SelfRefMixin
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
/** Provides models for the Python standard library. */
@@ -1349,6 +1351,161 @@ private module StdlibPrivate {
}
}
// ---------------------------------------------------------------------------
// wsgiref.simple_server
// ---------------------------------------------------------------------------
/** Provides models for the `wsgiref.simple_server` module. */
private module WsgirefSimpleServer {
class WsgiServerSubclass extends Class, SelfRefMixin {
WsgiServerSubclass() {
this.getABase() =
API::moduleImport("wsgiref")
.getMember("simple_server")
.getMember("WSGIServer")
.getASubclass*()
.getAUse()
.asExpr()
}
}
/**
* A function that was passed to the `set_app` method of a
* `wsgiref.simple_server.WSGIServer` instance.
*
* See https://docs.python.org/3.10/library/wsgiref.html#wsgiref.simple_server.WSGIServer.set_app
*
* See https://github.com/python/cpython/blob/b567b9d74bd9e476a3027335873bb0508d6e450f/Lib/wsgiref/handlers.py#L137
* for how a request is processed and given to an application.
*/
class WsgirefSimpleServerApplication extends HTTP::Server::RequestHandler::Range {
WsgirefSimpleServerApplication() {
exists(DataFlow::Node appArg, DataFlow::CallCfgNode setAppCall |
(
setAppCall =
API::moduleImport("wsgiref")
.getMember("simple_server")
.getMember("WSGIServer")
.getASubclass*()
.getReturn()
.getMember("set_app")
.getACall()
or
setAppCall
.(DataFlow::MethodCallNode)
.calls(any(WsgiServerSubclass cls).getASelfRef(), "set_app")
) and
appArg in [setAppCall.getArg(0), setAppCall.getArgByName("application")]
|
appArg = poorMansFunctionTracker(this)
)
}
override Parameter getARoutedParameter() { none() }
override string getFramework() { result = "Stdlib: wsgiref.simple_server application" }
}
/**
* The parameter of a `WsgirefSimpleServerApplication` that takes the WSGI environment
* when processing a request.
*
* See https://docs.python.org/3.10/library/wsgiref.html#wsgiref.simple_server.WSGIRequestHandler.get_environ
*/
class WSGIEnvirontParameter extends RemoteFlowSource::Range, DataFlow::ParameterNode {
WSGIEnvirontParameter() {
exists(WsgirefSimpleServerApplication func |
if func.isMethod()
then this.getParameter() = func.getArg(1)
else this.getParameter() = func.getArg(0)
)
}
override string getSourceType() {
result = "Stdlib: wsgiref.simple_server application: WSGI environment parameter"
}
}
/**
* Gets a reference to the parameter of a `WsgirefSimpleServerApplication` that
* takes the `start_response` function.
*
* See https://github.com/python/cpython/blob/b567b9d74bd9e476a3027335873bb0508d6e450f/Lib/wsgiref/handlers.py#L225-L252
*/
private DataFlow::TypeTrackingNode startResponse(DataFlow::TypeTracker t) {
t.start() and
exists(WsgirefSimpleServerApplication func |
if func.isMethod()
then result.(DataFlow::ParameterNode).getParameter() = func.getArg(2)
else result.(DataFlow::ParameterNode).getParameter() = func.getArg(1)
)
or
exists(DataFlow::TypeTracker t2 | result = startResponse(t2).track(t2, t))
}
/**
* Gets a reference to the parameter of a `WsgirefSimpleServerApplication` that
* takes the `start_response` function.
*
* See https://github.com/python/cpython/blob/b567b9d74bd9e476a3027335873bb0508d6e450f/Lib/wsgiref/handlers.py#L225-L252
*/
DataFlow::Node startResponse() { startResponse(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Gets a reference to the `write` function (that will write data to the response),
* which is the return value from calling the `start_response` function.
*
* See https://github.com/python/cpython/blob/b567b9d74bd9e476a3027335873bb0508d6e450f/Lib/wsgiref/handlers.py#L225-L252
*/
private DataFlow::TypeTrackingNode writeFunction(DataFlow::TypeTracker t) {
t.start() and
result.(DataFlow::CallCfgNode).getFunction() = startResponse()
or
exists(DataFlow::TypeTracker t2 | result = writeFunction(t2).track(t2, t))
}
/**
* Gets a reference to the `write` function (that will write data to the response),
* which is the return value from calling the `start_response` function.
*
* See https://github.com/python/cpython/blob/b567b9d74bd9e476a3027335873bb0508d6e450f/Lib/wsgiref/handlers.py#L225-L252
*/
DataFlow::Node writeFunction() { writeFunction(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* A call to the `write` function.
*
* See https://github.com/python/cpython/blob/b567b9d74bd9e476a3027335873bb0508d6e450f/Lib/wsgiref/handlers.py#L276
*/
class WsgirefSimpleServerApplicationWriteCall extends HTTP::Server::HttpResponse::Range,
DataFlow::CallCfgNode {
WsgirefSimpleServerApplicationWriteCall() { this.getFunction() = writeFunction() }
override DataFlow::Node getBody() { result in [this.getArg(0), this.getArgByName("data")] }
override DataFlow::Node getMimetypeOrContentTypeArg() { none() }
override string getMimetypeDefault() { none() }
}
/**
* A return from a `WsgirefSimpleServerApplication`, which is included in the response body.
*/
class WsgirefSimpleServerApplicationReturn extends HTTP::Server::HttpResponse::Range,
DataFlow::CfgNode {
WsgirefSimpleServerApplicationReturn() {
exists(WsgirefSimpleServerApplication requestHandler |
node = requestHandler.getAReturnValueFlowNode()
)
}
override DataFlow::Node getBody() { result = this }
override DataFlow::Node getMimetypeOrContentTypeArg() { none() }
override string getMimetypeDefault() { none() }
}
}
// ---------------------------------------------------------------------------
// sqlite3
// ---------------------------------------------------------------------------

View File

@@ -62,6 +62,22 @@ private DataFlow::TypeTrackingNode poorMansFunctionTracker(DataFlow::TypeTracker
exists(DataFlow::TypeTracker t2 | result = poorMansFunctionTracker(t2, func).track(t2, t))
}
/**
* Gets a reference to `func`. `func` must be defined inside a class, and the reference
* will be inside a different method of the same class.
*/
private DataFlow::Node getSimpleMethodReferenceWithinClass(Function func) {
// TODO: Should take MRO into account
exists(Class cls, Function otherFunc, DataFlow::Node selfRefOtherFunc |
pragma[only_bind_into](cls).getAMethod() = func and
pragma[only_bind_into](cls).getAMethod() = otherFunc
|
selfRefOtherFunc.getALocalSource().(DataFlow::ParameterNode).getParameter() =
otherFunc.getArg(0) and
result.(DataFlow::AttrRead).accesses(selfRefOtherFunc, func.getName())
)
}
/**
* INTERNAL: Do not use.
*
@@ -80,7 +96,20 @@ private DataFlow::TypeTrackingNode poorMansFunctionTracker(DataFlow::TypeTracker
* inst = MyClass()
* print(inst.my_method)
* ```
*
* But it is able to handle simple method calls within a class (but does not take MRO into
* account).
* ```py
* class MyClass:
* def method1(self);
* pass
*
* def method2(self);
* self.method1()
* ```
*/
DataFlow::Node poorMansFunctionTracker(Function func) {
poorMansFunctionTracker(DataFlow::TypeTracker::end(), func).flowsTo(result)
or
result = getSimpleMethodReferenceWithinClass(func)
}

View File

@@ -0,0 +1,28 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.frameworks.internal.PoorMansFunctionResolution
import TestUtilities.InlineExpectationsTest
class InlinePoorMansFunctionResolutionTest extends InlineExpectationsTest {
InlinePoorMansFunctionResolutionTest() { this = "InlinePoorMansFunctionResolutionTest" }
override string getARelevantTag() { result = "resolved" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(Function func, DataFlow::Node ref |
ref = poorMansFunctionTracker(func) and
not ref.asExpr() instanceof FunctionExpr and
// exclude things like `GSSA variable func`
exists(ref.asExpr()) and
// exclude decorator calls (which with our extractor rewrites does reference the
// function)
not ref.asExpr() = func.getDefinition().(FunctionExpr).getADecoratorCall()
|
value = func.getName() and
tag = "resolved" and
element = ref.toString() and
location = ref.getLocation()
)
}
}

View File

@@ -0,0 +1,42 @@
def func():
print("func")
func() # $ resolved=func
class MyBase:
def base_method(self):
print("base_method", self)
class MyClass(MyBase):
def method1(self):
print("method1", self)
@classmethod
def cls_method(cls):
print("cls_method", cls)
@staticmethod
def static():
print("static")
def method2(self):
print("method2", self)
self.method1() # $ resolved=method1
self.base_method()
self.cls_method() # $ resolved=cls_method
self.static() # $ resolved=static
MyClass.cls_method() # $ resolved=cls_method
MyClass.static() # $ resolved=static
x = MyClass()
x.base_method()
x.method1()
x.cls_method()
x.static()
x.method2()

View File

@@ -0,0 +1,57 @@
# This test file demonstrates how to use an application with a wsgiref.simple_server
# see https://docs.python.org/3/library/wsgiref.html#wsgiref.simple_server.WSGIServer
import sys
import wsgiref.simple_server
def ignore(*arg, **kwargs): pass
ensure_tainted = ensure_not_tainted = ignore
ADDRESS = ("localhost", 8000)
# I wanted to showcase that we handle both functions and bound-methods, so it's possible
# to run this test-file in 2 different ways.
def func(environ, start_response): # $ requestHandler
ensure_tainted(
environ, # $ tainted
environ["PATH_INFO"], # $ tainted
)
write = start_response("200 OK", [("Content-Type", "text/plain")])
write(b"hello") # $ HttpResponse responseBody=b"hello"
write(data=b" ") # $ HttpResponse responseBody=b" "
# function return value should be an iterable that will also be written to the
# response.
return [b"world", b"!"] # $ HttpResponse responseBody=List
class MyServer(wsgiref.simple_server.WSGIServer):
def __init__(self):
super().__init__(ADDRESS, wsgiref.simple_server.WSGIRequestHandler)
self.set_app(self.my_method)
def my_method(self, _env, start_response): # $ requestHandler
start_response("200 OK", [])
return [b"my_method"] # $ HttpResponse responseBody=List
case = sys.argv[1]
if case == "1":
server = wsgiref.simple_server.WSGIServer(ADDRESS, wsgiref.simple_server.WSGIRequestHandler)
server.set_app(func)
elif case == "2":
server = MyServer()
elif case == "3":
server = MyServer()
def func3(_env, start_response): # $ requestHandler
start_response("200 OK", [])
return [b"foo"] # $ HttpResponse responseBody=List
server.set_app(func3)
else:
sys.exit("wrong case")
print(f"Running on http://{ADDRESS[0]}:{ADDRESS[1]}")
server.serve_forever()