Python: Model outgoing http client requests

This commit is contained in:
Rasmus Wriedt Larsen
2020-02-24 14:55:52 +01:00
parent db33c360bc
commit cd5399d43e
18 changed files with 314 additions and 2 deletions

View File

@@ -0,0 +1,2 @@
import semmle.python.web.client.StdLib
import semmle.python.web.client.Requests

View File

@@ -89,7 +89,7 @@ abstract class CookieSet extends CookieOperation {}
/** Generic taint sink in a http response */
abstract class HttpResponseTaintSink extends TaintSink {
override predicate sinks(TaintKind kind) {
override predicate sinks(TaintKind kind) {
kind instanceof ExternalStringKind
}
@@ -97,9 +97,51 @@ abstract class HttpResponseTaintSink extends TaintSink {
abstract class HttpRedirectTaintSink extends TaintSink {
override predicate sinks(TaintKind kind) {
override predicate sinks(TaintKind kind) {
kind instanceof ExternalStringKind
}
}
module Client {
// TODO: user-input in other than URL:
// - `data`, `json` for `requests.post`
// - `body` for `HTTPConnection.request`
// - headers?
// TODO: Add more library support
// - urllib3 https://github.com/urllib3/urllib3
// - httpx https://github.com/encode/httpx
/**
* An outgoing http request
*
* For example:
* conn = HTTPConnection('example.com')
conn.request('GET', '/path')
*/
abstract class HttpRequest extends CallNode {
/** Get any ControlFlowNode that is used to construct the final URL.
*
* In the HTTPConnection example, there is a result for both `'example.com'` and for `'/path'`.
*/
abstract ControlFlowNode getAUrlPart();
abstract string getMethodUpper();
}
/** Taint sink for the URL-part of an outgoing http request */
class HttpRequestUrlTaintSink extends TaintSink {
HttpRequestUrlTaintSink() {
this = any(HttpRequest r).getAUrlPart()
}
override predicate sinks(TaintKind kind) {
kind instanceof ExternalStringKind
}
}
}

View File

@@ -0,0 +1,22 @@
/**
* Modeling outgoing HTTP requests using the `requests` package
* https://pypi.org/project/requests/
*/
import python
private import semmle.python.web.Http
class RequestsHttpRequest extends Client::HttpRequest {
CallableValue func;
string method;
RequestsHttpRequest() {
method = httpVerbLower() and
func = Module::named("requests").attr(method).(CallableValue) and
this = func.getACall()
}
override ControlFlowNode getAUrlPart() { result = func.getNamedArgumentForCall(this, "url") }
override string getMethodUpper() { result = method.toUpperCase() }
}

View File

@@ -0,0 +1,52 @@
import python
private import semmle.python.web.Http
ClassValue httpConnectionClass() {
// Python 2
result = Value::named("httplib.HTTPConnection")
or
result = Value::named("httplib.HTTPSConnection")
or
// Python 3
result = Value::named("http.client.HTTPConnection")
or
result = Value::named("http.client.HTTPSConnection")
or
// six
result = Value::named("six.moves.http_client.HTTPConnection")
or
result = Value::named("six.moves.http_client.HTTPSConnection")
}
class HttpConnectionHttpRequest extends Client::HttpRequest {
CallNode constructor_call;
CallableValue func;
HttpConnectionHttpRequest() {
exists(ClassValue cls, AttrNode call_origin, Value constructor_call_value |
cls = httpConnectionClass() and
func = cls.lookup("request") and
this = func.getACall() and
this.getFunction().pointsTo(_, _, call_origin) and
call_origin.getObject().pointsTo(_, constructor_call_value, constructor_call) and
cls = constructor_call_value.getClass() and
constructor_call = cls.getACall()
)
}
override ControlFlowNode getAUrlPart() {
result = func.getNamedArgumentForCall(this, "url")
or
result = constructor_call.getArg(0)
or
result = constructor_call.getArgByName("host")
}
override string getMethodUpper() {
exists(string method |
result = method.toUpperCase() and
func.getNamedArgumentForCall(this, "method").pointsTo(Value::forString(method))
)
}
}

View File

@@ -0,0 +1,10 @@
| test.py:6:5:6:32 | ControlFlowNode for Attribute() | test.py:5:27:5:39 | ControlFlowNode for Str | GET |
| test.py:6:5:6:32 | ControlFlowNode for Attribute() | test.py:6:25:6:31 | ControlFlowNode for Str | GET |
| test.py:15:5:15:33 | ControlFlowNode for Attribute() | test.py:10:28:10:40 | ControlFlowNode for Str | POST |
| test.py:15:5:15:33 | ControlFlowNode for Attribute() | test.py:15:26:15:32 | ControlFlowNode for Str | POST |
| test.py:20:5:20:33 | ControlFlowNode for Attribute() | test.py:19:27:19:39 | ControlFlowNode for Str | <NO METHOD> |
| test.py:20:5:20:33 | ControlFlowNode for Attribute() | test.py:20:26:20:32 | ControlFlowNode for Str | <NO METHOD> |
| test.py:30:5:30:32 | ControlFlowNode for Attribute() | test.py:28:27:28:30 | ControlFlowNode for fake | GET |
| test.py:30:5:30:32 | ControlFlowNode for Attribute() | test.py:30:25:30:31 | ControlFlowNode for Str | GET |
| test.py:37:5:37:29 | ControlFlowNode for req_meth() | test.py:35:27:35:39 | ControlFlowNode for Str | HEAD |
| test.py:37:5:37:29 | ControlFlowNode for req_meth() | test.py:37:22:37:28 | ControlFlowNode for Str | HEAD |

View File

@@ -0,0 +1,11 @@
import python
import semmle.python.web.Http
import semmle.python.web.ClientHttpRequest
from Client::HttpRequest req, string method
where
if exists(req.getMethodUpper())
then method = req.getMethodUpper()
else method = "<NO METHOD>"
select req, req.getAUrlPart(), method

View File

@@ -0,0 +1,37 @@
from httplib import HTTPConnection, HTTPSConnection
def basic():
conn = HTTPConnection('example.com')
conn.request('GET', '/path')
def indirect_caller():
conn = HTTPSConnection('example.com')
indirect_callee(conn)
def indirect_callee(conn):
conn.request('POST', '/path')
def method_not_known(method):
conn = HTTPConnection('example.com')
conn.request(method, '/path')
def sneaky_setting_host():
# We don't handle that the host is overwritten directly.
# A contrived example; you're not supposed to do this, but you certainly can.
fake = 'fakehost.com'
real = 'realhost.com'
conn = HTTPConnection(fake)
conn.host = real
conn.request('GET', '/path')
def tricky_not_attribute_node():
# A contrived example; you're not supposed to do this, but you certainly can.
conn = HTTPConnection('example.com')
req_meth = conn.request
req_meth('HEAD', '/path')

View File

@@ -0,0 +1,10 @@
| test.py:6:5:6:32 | ControlFlowNode for Attribute() | test.py:5:27:5:39 | ControlFlowNode for Str | GET |
| test.py:6:5:6:32 | ControlFlowNode for Attribute() | test.py:6:25:6:31 | ControlFlowNode for Str | GET |
| test.py:15:5:15:33 | ControlFlowNode for Attribute() | test.py:10:28:10:40 | ControlFlowNode for Str | POST |
| test.py:15:5:15:33 | ControlFlowNode for Attribute() | test.py:15:26:15:32 | ControlFlowNode for Str | POST |
| test.py:20:5:20:33 | ControlFlowNode for Attribute() | test.py:19:27:19:39 | ControlFlowNode for Str | <NO METHOD> |
| test.py:20:5:20:33 | ControlFlowNode for Attribute() | test.py:20:26:20:32 | ControlFlowNode for Str | <NO METHOD> |
| test.py:30:5:30:32 | ControlFlowNode for Attribute() | test.py:28:27:28:30 | ControlFlowNode for fake | GET |
| test.py:30:5:30:32 | ControlFlowNode for Attribute() | test.py:30:25:30:31 | ControlFlowNode for Str | GET |
| test.py:37:5:37:29 | ControlFlowNode for req_meth() | test.py:35:27:35:39 | ControlFlowNode for Str | HEAD |
| test.py:37:5:37:29 | ControlFlowNode for req_meth() | test.py:37:22:37:28 | ControlFlowNode for Str | HEAD |

View File

@@ -0,0 +1,11 @@
import python
import semmle.python.web.Http
import semmle.python.web.ClientHttpRequest
from Client::HttpRequest req, string method
where
if exists(req.getMethodUpper())
then method = req.getMethodUpper()
else method = "<NO METHOD>"
select req, req.getAUrlPart(), method

View File

@@ -0,0 +1,37 @@
from http.client import HTTPConnection, HTTPSConnection
def basic():
conn = HTTPConnection('example.com')
conn.request('GET', '/path')
def indirect_caller():
conn = HTTPSConnection('example.com')
indirect_callee(conn)
def indirect_callee(conn):
conn.request('POST', '/path')
def method_not_known(method):
conn = HTTPConnection('example.com')
conn.request(method, '/path')
def sneaky_setting_host():
# We don't handle that the host is overwritten directly.
# A contrived example; you're not supposed to do this, but you certainly can.
fake = 'fakehost.com'
real = 'realhost.com'
conn = HTTPConnection(fake)
conn.host = real
conn.request('GET', '/path')
def tricky_not_attribute_node():
# A contrived example; you're not supposed to do this, but you certainly can.
conn = HTTPConnection('example.com')
req_meth = conn.request
req_meth('HEAD', '/path')

View File

@@ -0,0 +1,2 @@
| test.py:3:1:3:27 | ControlFlowNode for Attribute() | test.py:3:14:3:26 | ControlFlowNode for Str | GET |
| test.py:4:1:4:28 | ControlFlowNode for Attribute() | test.py:4:15:4:27 | ControlFlowNode for Str | POST |

View File

@@ -0,0 +1,11 @@
import python
import semmle.python.web.Http
import semmle.python.web.ClientHttpRequest
from Client::HttpRequest req, string method
where
if exists(req.getMethodUpper())
then method = req.getMethodUpper()
else method = "<NO METHOD>"
select req, req.getAUrlPart(), method

View File

@@ -0,0 +1 @@
semmle-extractor-options: -p ../../../../query-tests/Security/lib/ --max-import-depth=1

View File

@@ -0,0 +1,4 @@
import requests
requests.get('example.com')
requests.post('example.com')

View File

@@ -0,0 +1,10 @@
| test.py:7:5:7:32 | ControlFlowNode for Attribute() | test.py:6:27:6:39 | ControlFlowNode for Str | GET |
| test.py:7:5:7:32 | ControlFlowNode for Attribute() | test.py:7:25:7:31 | ControlFlowNode for Str | GET |
| test.py:16:5:16:33 | ControlFlowNode for Attribute() | test.py:11:28:11:40 | ControlFlowNode for Str | POST |
| test.py:16:5:16:33 | ControlFlowNode for Attribute() | test.py:16:26:16:32 | ControlFlowNode for Str | POST |
| test.py:21:5:21:33 | ControlFlowNode for Attribute() | test.py:20:27:20:39 | ControlFlowNode for Str | <NO METHOD> |
| test.py:21:5:21:33 | ControlFlowNode for Attribute() | test.py:21:26:21:32 | ControlFlowNode for Str | <NO METHOD> |
| test.py:31:5:31:32 | ControlFlowNode for Attribute() | test.py:29:27:29:30 | ControlFlowNode for fake | GET |
| test.py:31:5:31:32 | ControlFlowNode for Attribute() | test.py:31:25:31:31 | ControlFlowNode for Str | GET |
| test.py:38:5:38:29 | ControlFlowNode for req_meth() | test.py:36:27:36:39 | ControlFlowNode for Str | HEAD |
| test.py:38:5:38:29 | ControlFlowNode for req_meth() | test.py:38:22:38:28 | ControlFlowNode for Str | HEAD |

View File

@@ -0,0 +1,11 @@
import python
import semmle.python.web.Http
import semmle.python.web.ClientHttpRequest
from Client::HttpRequest req, string method
where
if exists(req.getMethodUpper())
then method = req.getMethodUpper()
else method = "<NO METHOD>"
select req, req.getAUrlPart(), method

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=2

View File

@@ -0,0 +1,38 @@
from six.moves.http_client import HTTPConnection, HTTPSConnection
from six.moves.urllib.parse import urlsplit
def basic():
conn = HTTPConnection('example.com')
conn.request('GET', '/path')
def indirect_caller():
conn = HTTPSConnection('example.com')
indirect_callee(conn)
def indirect_callee(conn):
conn.request('POST', '/path')
def method_not_known(method):
conn = HTTPConnection('example.com')
conn.request(method, '/path')
def sneaky_setting_host():
# We don't handle that the host is overwritten directly.
# A contrived example; you're not supposed to do this, but you certainly can.
fake = 'fakehost.com'
real = 'realhost.com'
conn = HTTPConnection(fake)
conn.host = real
conn.request('GET', '/path')
def tricky_not_attribute_node():
# A contrived example; you're not supposed to do this, but you certainly can.
conn = HTTPConnection('example.com')
req_meth = conn.request
req_meth('HEAD', '/path')