Merge pull request #2904 from RasmusWL/python-http-clients

Python: Model outgoing HTTP client requests
This commit is contained in:
Taus
2020-02-26 15:49:41 +01:00
committed by GitHub
16 changed files with 267 additions and 2 deletions

View File

@@ -0,0 +1,2 @@
import semmle.python.web.client.StdLib
import semmle.python.web.client.Requests

View File

@@ -89,7 +89,7 @@ abstract class CookieSet extends CookieOperation {}
/** Generic taint sink in a http response */
abstract class HttpResponseTaintSink extends TaintSink {
override predicate sinks(TaintKind kind) {
override predicate sinks(TaintKind kind) {
kind instanceof ExternalStringKind
}
@@ -97,9 +97,51 @@ abstract class HttpResponseTaintSink extends TaintSink {
abstract class HttpRedirectTaintSink extends TaintSink {
override predicate sinks(TaintKind kind) {
override predicate sinks(TaintKind kind) {
kind instanceof ExternalStringKind
}
}
module Client {
// TODO: user-input in other than URL:
// - `data`, `json` for `requests.post`
// - `body` for `HTTPConnection.request`
// - headers?
// TODO: Add more library support
// - urllib3 https://github.com/urllib3/urllib3
// - httpx https://github.com/encode/httpx
/**
* An outgoing http request
*
* For example:
* conn = HTTPConnection('example.com')
conn.request('GET', '/path')
*/
abstract class HttpRequest extends ControlFlowNode {
/** Get any ControlFlowNode that is used to construct the final URL.
*
* In the HTTPConnection example, there is a result for both `'example.com'` and for `'/path'`.
*/
abstract ControlFlowNode getAUrlPart();
abstract string getMethodUpper();
}
/** Taint sink for the URL-part of an outgoing http request */
class HttpRequestUrlTaintSink extends TaintSink {
HttpRequestUrlTaintSink() {
this = any(HttpRequest r).getAUrlPart()
}
override predicate sinks(TaintKind kind) {
kind instanceof ExternalStringKind
}
}
}

View File

@@ -0,0 +1,22 @@
/**
* Modeling outgoing HTTP requests using the `requests` package
* https://pypi.org/project/requests/
*/
import python
private import semmle.python.web.Http
class RequestsHttpRequest extends Client::HttpRequest, CallNode {
CallableValue func;
string method;
RequestsHttpRequest() {
method = httpVerbLower() and
func = Module::named("requests").attr(method) and
this = func.getACall()
}
override ControlFlowNode getAUrlPart() { result = func.getNamedArgumentForCall(this, "url") }
override string getMethodUpper() { result = method.toUpperCase() }
}

View File

@@ -0,0 +1,55 @@
import python
private import semmle.python.web.Http
ClassValue httpConnectionClass() {
// Python 2
result = Value::named("httplib.HTTPConnection")
or
result = Value::named("httplib.HTTPSConnection")
or
// Python 3
result = Value::named("http.client.HTTPConnection")
or
result = Value::named("http.client.HTTPSConnection")
or
// six
result = Value::named("six.moves.http_client.HTTPConnection")
or
result = Value::named("six.moves.http_client.HTTPSConnection")
}
class HttpConnectionHttpRequest extends Client::HttpRequest, CallNode {
CallNode constructor_call;
CallableValue func;
HttpConnectionHttpRequest() {
exists(ClassValue cls, AttrNode call_origin, Value constructor_call_value |
cls = httpConnectionClass() and
func = cls.lookup("request") and
this = func.getACall() and
// since you can do `r = conn.request; r('GET', path)`, we need to find the origin
this.getFunction().pointsTo(_, _, call_origin) and
// Since HTTPSConnection is a subtype of HTTPConnection, up until this point, `cls` could be either class,
// because `HTTPSConnection.request == HTTPConnection.request`. To avoid generating 2 results, we filter
// on the actual class used as the constructor
call_origin.getObject().pointsTo(_, constructor_call_value, constructor_call) and
cls = constructor_call_value.getClass() and
constructor_call = cls.getACall()
)
}
override ControlFlowNode getAUrlPart() {
result = func.getNamedArgumentForCall(this, "url")
or
result = constructor_call.getArg(0)
or
result = constructor_call.getArgByName("host")
}
override string getMethodUpper() {
exists(string method |
result = method.toUpperCase() and
func.getNamedArgumentForCall(this, "method").pointsTo(Value::forString(method))
)
}
}

View File

@@ -0,0 +1,2 @@
| test.py:3:1:3:27 | ControlFlowNode for Attribute() | test.py:3:14:3:26 | ControlFlowNode for Str | GET |
| test.py:4:1:4:28 | ControlFlowNode for Attribute() | test.py:4:15:4:27 | ControlFlowNode for Str | POST |

View File

@@ -0,0 +1,11 @@
import python
import semmle.python.web.Http
import semmle.python.web.ClientHttpRequest
from Client::HttpRequest req, string method
where
if exists(req.getMethodUpper())
then method = req.getMethodUpper()
else method = "<NO METHOD>"
select req, req.getAUrlPart(), method

View File

@@ -0,0 +1 @@
semmle-extractor-options: -p ../../../../query-tests/Security/lib/ --max-import-depth=1

View File

@@ -0,0 +1,4 @@
import requests
requests.get('example.com')
requests.post('example.com')

View File

@@ -0,0 +1,10 @@
| test.py:6:5:6:32 | ControlFlowNode for Attribute() | test.py:5:27:5:39 | ControlFlowNode for Str | GET |
| test.py:6:5:6:32 | ControlFlowNode for Attribute() | test.py:6:25:6:31 | ControlFlowNode for Str | GET |
| test.py:15:5:15:33 | ControlFlowNode for Attribute() | test.py:10:28:10:40 | ControlFlowNode for Str | POST |
| test.py:15:5:15:33 | ControlFlowNode for Attribute() | test.py:15:26:15:32 | ControlFlowNode for Str | POST |
| test.py:20:5:20:33 | ControlFlowNode for Attribute() | test.py:19:27:19:39 | ControlFlowNode for Str | <NO METHOD> |
| test.py:20:5:20:33 | ControlFlowNode for Attribute() | test.py:20:26:20:32 | ControlFlowNode for Str | <NO METHOD> |
| test.py:30:5:30:32 | ControlFlowNode for Attribute() | test.py:28:27:28:30 | ControlFlowNode for fake | GET |
| test.py:30:5:30:32 | ControlFlowNode for Attribute() | test.py:30:25:30:31 | ControlFlowNode for Str | GET |
| test.py:37:5:37:29 | ControlFlowNode for req_meth() | test.py:35:27:35:39 | ControlFlowNode for Str | HEAD |
| test.py:37:5:37:29 | ControlFlowNode for req_meth() | test.py:37:22:37:28 | ControlFlowNode for Str | HEAD |

View File

@@ -0,0 +1,11 @@
import python
import semmle.python.web.Http
import semmle.python.web.ClientHttpRequest
from Client::HttpRequest req, string method
where
if exists(req.getMethodUpper())
then method = req.getMethodUpper()
else method = "<NO METHOD>"
select req, req.getAUrlPart(), method

View File

@@ -0,0 +1,2 @@
semmle-extractor-options: --max-import-depth=2
optimize: true

View File

@@ -0,0 +1,37 @@
from six.moves.http_client import HTTPConnection, HTTPSConnection
def basic():
conn = HTTPConnection('example.com')
conn.request('GET', '/path')
def indirect_caller():
conn = HTTPSConnection('example.com')
indirect_callee(conn)
def indirect_callee(conn):
conn.request('POST', '/path')
def method_not_known(method):
conn = HTTPConnection('example.com')
conn.request(method, '/path')
def sneaky_setting_host():
# We don't handle that the host is overwritten directly.
# A contrived example; you're not supposed to do this, but you certainly can.
fake = 'fakehost.com'
real = 'realhost.com'
conn = HTTPConnection(fake)
conn.host = real
conn.request('GET', '/path')
def tricky_not_attribute_node():
# A contrived example; you're not supposed to do this, but you certainly can.
conn = HTTPConnection('example.com')
req_meth = conn.request
req_meth('HEAD', '/path')

View File

@@ -0,0 +1,10 @@
| test.py:13:5:13:32 | ControlFlowNode for Attribute() | test.py:12:27:12:39 | ControlFlowNode for Str | GET |
| test.py:13:5:13:32 | ControlFlowNode for Attribute() | test.py:13:25:13:31 | ControlFlowNode for Str | GET |
| test.py:22:5:22:33 | ControlFlowNode for Attribute() | test.py:17:28:17:40 | ControlFlowNode for Str | POST |
| test.py:22:5:22:33 | ControlFlowNode for Attribute() | test.py:22:26:22:32 | ControlFlowNode for Str | POST |
| test.py:27:5:27:33 | ControlFlowNode for Attribute() | test.py:26:27:26:39 | ControlFlowNode for Str | <NO METHOD> |
| test.py:27:5:27:33 | ControlFlowNode for Attribute() | test.py:27:26:27:32 | ControlFlowNode for Str | <NO METHOD> |
| test.py:37:5:37:32 | ControlFlowNode for Attribute() | test.py:35:27:35:30 | ControlFlowNode for fake | GET |
| test.py:37:5:37:32 | ControlFlowNode for Attribute() | test.py:37:25:37:31 | ControlFlowNode for Str | GET |
| test.py:44:5:44:29 | ControlFlowNode for req_meth() | test.py:42:27:42:39 | ControlFlowNode for Str | HEAD |
| test.py:44:5:44:29 | ControlFlowNode for req_meth() | test.py:44:22:44:28 | ControlFlowNode for Str | HEAD |

View File

@@ -0,0 +1,11 @@
import python
import semmle.python.web.Http
import semmle.python.web.ClientHttpRequest
from Client::HttpRequest req, string method
where
if exists(req.getMethodUpper())
then method = req.getMethodUpper()
else method = "<NO METHOD>"
select req, req.getAUrlPart(), method

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=1

View File

@@ -0,0 +1,44 @@
import sys
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3
if PY2:
from httplib import HTTPConnection, HTTPSConnection
if PY3:
from http.client import HTTPConnection, HTTPSConnection
def basic():
conn = HTTPConnection('example.com')
conn.request('GET', '/path')
def indirect_caller():
conn = HTTPSConnection('example.com')
indirect_callee(conn)
def indirect_callee(conn):
conn.request('POST', '/path')
def method_not_known(method):
conn = HTTPConnection('example.com')
conn.request(method, '/path')
def sneaky_setting_host():
# We don't handle that the host is overwritten directly.
# A contrived example; you're not supposed to do this, but you certainly can.
fake = 'fakehost.com'
real = 'realhost.com'
conn = HTTPConnection(fake)
conn.host = real
conn.request('GET', '/path')
def tricky_not_attribute_node():
# A contrived example; you're not supposed to do this, but you certainly can.
conn = HTTPConnection('example.com')
req_meth = conn.request
req_meth('HEAD', '/path')