Merge branch 'main' into change/adjust-extracted-files-diagnostics

This commit is contained in:
Sid Shankar
2024-01-16 21:51:41 -05:00
committed by GitHub
598 changed files with 207550 additions and 24866 deletions

View File

@@ -1,3 +1,7 @@
## 0.9.6
No user-facing changes.
## 0.9.5
No user-facing changes.

View File

@@ -16,6 +16,10 @@ To guard against untrusted URL redirection, it is advisable to avoid putting use
directly into a redirect URL. Instead, maintain a list of authorized
redirects on the server; then choose from that list based on the user input provided.
</p>
<p>
If this is not possible, then the user input should be validated in some other way,
for example, by verifying that the target URL does not include an explicit host name.
</p>
</recommendation>
<example>
@@ -27,11 +31,29 @@ without validating the input, which facilitates phishing attacks:
<sample src="examples/redirect_bad.py"/>
<p>
One way to remedy the problem is to validate the user input against a known fixed string
before doing the redirection:
If you know the set of valid redirect targets, you can maintain a list of them on the server
and check that the user input is in that list:
</p>
<sample src="examples/redirect_good.py"/>
<p>
Often this is not possible, so an alternative is to check that the target URL does not
specify an explicit host name. For example, the Django framework provides a
function <code>url_has_allowed_host_and_scheme</code> that can be used to check that a
URL is safe to redirect to, as shown in the following example:
</p>
<sample src="examples/redirect_good2.py"/>
<p>
Note that many browsers accept backslash characters (<code>\</code>) as equivalent to
forward slash characters (<code>/</code>) in URLs, so it is important to account for this
when validating the URL, for example by first replacing all backslashes with forward
slashes. Django's <code>url_has_allowed_host_and_scheme</code> function
does this automatically, but other libraries may not.
</p>
</example>
<references>

View File

@@ -10,4 +10,5 @@ def hello():
if target == VALID_REDIRECT:
return redirect(target, code=302)
else:
... # Error
# ignore the target and redirect to the home page
return redirect('/', code=302)

View File

@@ -0,0 +1,13 @@
from django.http import HttpResponseRedirect
from django.shortcuts import redirect
from django.utils.http import url_has_allowed_host_and_scheme
from django.views import View
class RedirectView(View):
def get(self, request, *args, **kwargs):
target = request.GET.get('target', '')
if url_has_allowed_host_and_scheme(target, allowed_hosts=None):
return HttpResponseRedirect(target)
else:
# ignore the target and redirect to the home page
return redirect('/')

View File

@@ -0,0 +1,3 @@
## 0.9.6
No user-facing changes.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.9.5
lastReleaseVersion: 0.9.6

View File

@@ -0,0 +1,589 @@
/**
* @name Find new subclasses to model
* @id py/meta/find-subclasses-to-model
* @kind table
*/
import python
import semmle.python.dataflow.new.DataFlow
private import semmle.python.ApiGraphs
import semmle.python.frameworks.internal.SubclassFinder::NotExposed
private import semmle.python.frameworks.Flask
private import semmle.python.frameworks.FastApi
private import semmle.python.frameworks.Django
private import semmle.python.frameworks.Tornado
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.Requests
private import semmle.python.frameworks.Starlette
private import semmle.python.frameworks.ClickhouseDriver
private import semmle.python.frameworks.Aiohttp
private import semmle.python.frameworks.Fabric
private import semmle.python.frameworks.Httpx
private import semmle.python.frameworks.Invoke
private import semmle.python.frameworks.MarkupSafe
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.Pycurl
private import semmle.python.frameworks.RestFramework
private import semmle.python.frameworks.SqlAlchemy
private import semmle.python.frameworks.Tornado
private import semmle.python.frameworks.Urllib3
private import semmle.python.frameworks.Pydantic
private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Aioch
private import semmle.python.frameworks.Lxml
import semmle.python.frameworks.data.internal.ApiGraphModelsExtensions as Extensions
class FlaskViewClasses extends FindSubclassesSpec {
FlaskViewClasses() { this = "flask.View~Subclass" }
override API::Node getAlreadyModeledClass() { result = Flask::Views::View::subclassRef() }
}
class FlaskMethodViewClasses extends FindSubclassesSpec {
FlaskMethodViewClasses() { this = "flask.MethodView~Subclass" }
override API::Node getAlreadyModeledClass() { result = Flask::Views::MethodView::subclassRef() }
override FindSubclassesSpec getSuperClass() { result instanceof FlaskViewClasses }
override string getFullyQualifiedName() { result = "flask.views.MethodView" }
}
class FastApiRouter extends FindSubclassesSpec {
FastApiRouter() { this = "fastapi.APIRouter~Subclass" }
override API::Node getAlreadyModeledClass() { result = FastApi::ApiRouter::cls() }
}
class DjangoForms extends FindSubclassesSpec {
DjangoForms() { this = "django.forms.BaseForm~Subclass" }
override API::Node getAlreadyModeledClass() {
result = any(Django::Forms::Form::ModeledSubclass subclass)
}
}
class DjangoView extends FindSubclassesSpec {
DjangoView() { this = "Django.Views.View~Subclass" }
override API::Node getAlreadyModeledClass() {
result = any(Django::Views::View::ModeledSubclass subclass)
}
}
class DjangoField extends FindSubclassesSpec {
DjangoField() { this = "Django.Forms.Field~Subclass" }
override API::Node getAlreadyModeledClass() {
result = any(Django::Forms::Field::ModeledSubclass subclass)
}
}
class DjangoModel extends FindSubclassesSpec {
DjangoModel() { this = "Django.db.models.Model~Subclass" }
override API::Node getAlreadyModeledClass() {
result = PrivateDjango::DjangoImpl::DB::Models::Model::subclassRef()
}
}
class TornadoRequestHandler extends FindSubclassesSpec {
TornadoRequestHandler() { this = "tornado.web.RequestHandler~Subclass" }
override API::Node getAlreadyModeledClass() {
result = Tornado::TornadoModule::Web::RequestHandler::subclassRef()
}
}
class WSGIServer extends FindSubclassesSpec {
WSGIServer() { this = "wsgiref.simple_server.WSGIServer~Subclass" }
override API::Node getAlreadyModeledClass() {
result = StdlibPrivate::WsgirefSimpleServer::subclassRef()
}
}
class StdlibBaseHttpRequestHandler extends FindSubclassesSpec {
StdlibBaseHttpRequestHandler() { this = "http.server.BaseHTTPRequestHandler~Subclass" }
override API::Node getAlreadyModeledClass() {
result = StdlibPrivate::BaseHttpRequestHandler::subclassRef()
}
}
class StdlibCgiFieldStorage extends FindSubclassesSpec {
StdlibCgiFieldStorage() { this = "cgi.FieldStorage~Subclass" }
override API::Node getAlreadyModeledClass() {
result = StdlibPrivate::Cgi::FieldStorage::subclassRef()
}
}
class DjangoHttpResponse extends FindSubclassesSpec {
DjangoHttpResponse() { this = "django.http.response.HttpResponse~Subclass" }
override API::Node getAlreadyModeledClass() {
result = PrivateDjango::DjangoImpl::DjangoHttp::Response::HttpResponse::classRef()
}
}
class DjangoHttpResponseRedirect extends FindSubclassesSpec {
DjangoHttpResponseRedirect() { this = "django.http.response.HttpResponseRedirect~Subclass" }
override API::Node getAlreadyModeledClass() {
result = PrivateDjango::DjangoImpl::DjangoHttp::Response::HttpResponseRedirect::classRef()
}
override FindSubclassesSpec getSuperClass() { result instanceof DjangoHttpResponse }
override string getFullyQualifiedName() { result = "django.http.response.HttpResponseRedirect" }
}
class DjangoHttpResponsePermanentRedirect extends FindSubclassesSpec {
DjangoHttpResponsePermanentRedirect() {
this = "django.http.response.HttpResponsePermanentRedirect~Subclass"
}
override API::Node getAlreadyModeledClass() {
result =
PrivateDjango::DjangoImpl::DjangoHttp::Response::HttpResponsePermanentRedirect::classRef()
}
override FindSubclassesSpec getSuperClass() { result instanceof DjangoHttpResponse }
override string getFullyQualifiedName() {
result = "django.http.response.HttpResponsePermanentRedirect"
}
}
class DjangoHttpResponseNotModified extends FindSubclassesSpec {
DjangoHttpResponseNotModified() { this = "django.http.response.HttpResponseNotModified~Subclass" }
override API::Node getAlreadyModeledClass() {
result = PrivateDjango::DjangoImpl::DjangoHttp::Response::HttpResponseNotModified::classRef()
}
override FindSubclassesSpec getSuperClass() { result instanceof DjangoHttpResponse }
override string getFullyQualifiedName() {
result = "django.http.response.HttpResponseNotModified"
}
}
class DjangoHttpResponseBadRequest extends FindSubclassesSpec {
DjangoHttpResponseBadRequest() { this = "django.http.response.HttpResponseBadRequest~Subclass" }
override API::Node getAlreadyModeledClass() {
result = PrivateDjango::DjangoImpl::DjangoHttp::Response::HttpResponseBadRequest::classRef()
}
override FindSubclassesSpec getSuperClass() { result instanceof DjangoHttpResponse }
override string getFullyQualifiedName() { result = "django.http.response.HttpResponseBadRequest" }
}
class DjangoHttpResponseNotFound extends FindSubclassesSpec {
DjangoHttpResponseNotFound() { this = "django.http.response.HttpResponseNotFound~Subclass" }
override API::Node getAlreadyModeledClass() {
result = PrivateDjango::DjangoImpl::DjangoHttp::Response::HttpResponseNotFound::classRef()
}
override FindSubclassesSpec getSuperClass() { result instanceof DjangoHttpResponse }
override string getFullyQualifiedName() { result = "django.http.response.HttpResponseNotFound" }
}
class DjangoHttpResponseForbidden extends FindSubclassesSpec {
DjangoHttpResponseForbidden() { this = "django.http.response.HttpResponseForbidden~Subclass" }
override API::Node getAlreadyModeledClass() {
result = PrivateDjango::DjangoImpl::DjangoHttp::Response::HttpResponseForbidden::classRef()
}
override FindSubclassesSpec getSuperClass() { result instanceof DjangoHttpResponse }
override string getFullyQualifiedName() { result = "django.http.response.HttpResponseForbidden" }
}
class DjangoHttpResponseNotAllowed extends FindSubclassesSpec {
DjangoHttpResponseNotAllowed() { this = "django.http.response.HttpResponseNotAllowed~Subclass" }
override API::Node getAlreadyModeledClass() {
result = PrivateDjango::DjangoImpl::DjangoHttp::Response::HttpResponseNotAllowed::classRef()
}
override FindSubclassesSpec getSuperClass() { result instanceof DjangoHttpResponse }
override string getFullyQualifiedName() { result = "django.http.response.HttpResponseNotAllowed" }
}
class DjangoHttpResponseGone extends FindSubclassesSpec {
DjangoHttpResponseGone() { this = "django.http.response.HttpResponseGone~Subclass" }
override API::Node getAlreadyModeledClass() {
result = PrivateDjango::DjangoImpl::DjangoHttp::Response::HttpResponseGone::classRef()
}
override FindSubclassesSpec getSuperClass() { result instanceof DjangoHttpResponse }
override string getFullyQualifiedName() { result = "django.http.response.HttpResponseGone" }
}
class DjangoHttpResponseServerError extends FindSubclassesSpec {
DjangoHttpResponseServerError() { this = "django.http.response.HttpResponseServerError~Subclass" }
override API::Node getAlreadyModeledClass() {
result = PrivateDjango::DjangoImpl::DjangoHttp::Response::HttpResponseServerError::classRef()
}
override FindSubclassesSpec getSuperClass() { result instanceof DjangoHttpResponse }
override string getFullyQualifiedName() {
result = "django.http.response.HttpResponseServerError"
}
}
class DjangoHttpResponseJsonResponse extends FindSubclassesSpec {
DjangoHttpResponseJsonResponse() { this = "django.http.response.JsonResponse~Subclass" }
override API::Node getAlreadyModeledClass() {
result = PrivateDjango::DjangoImpl::DjangoHttp::Response::JsonResponse::classRef()
}
override FindSubclassesSpec getSuperClass() { result instanceof DjangoHttpResponse }
override string getFullyQualifiedName() { result = "django.http.response.JsonResponse" }
}
class DjangoHttpResponseStreamingResponse extends FindSubclassesSpec {
DjangoHttpResponseStreamingResponse() {
this = "django.http.response.StreamingHttpResponse~Subclass"
}
override API::Node getAlreadyModeledClass() {
result = PrivateDjango::DjangoImpl::DjangoHttp::Response::StreamingHttpResponse::classRef()
}
}
class DjangoHttpResponseFileResponse extends FindSubclassesSpec {
DjangoHttpResponseFileResponse() { this = "django.http.response.FileResponse~Subclass" }
override API::Node getAlreadyModeledClass() {
result = PrivateDjango::DjangoImpl::DjangoHttp::Response::FileResponse::classRef()
}
override FindSubclassesSpec getSuperClass() {
result instanceof DjangoHttpResponseStreamingResponse
}
override string getFullyQualifiedName() { result = "django.http.response.FileResponse" }
}
class FlaskResponse extends FindSubclassesSpec {
FlaskResponse() { this = "flask.Response~Subclass" }
override API::Node getAlreadyModeledClass() { result = Flask::Response::classRef() }
}
class RequestsResponse extends FindSubclassesSpec {
RequestsResponse() { this = "requests.models.Response~Subclass" }
override API::Node getAlreadyModeledClass() { result = Requests::Response::classRef() }
}
class HttpClientHttpResponse extends FindSubclassesSpec {
HttpClientHttpResponse() { this = "http.client.HTTPResponse~Subclass" }
override API::Node getAlreadyModeledClass() { result = StdlibPrivate::HttpResponse::classRef() }
}
class StarletteWebsocket extends FindSubclassesSpec {
StarletteWebsocket() { this = "starlette.websockets.WebSocket~Subclass" }
override API::Node getAlreadyModeledClass() { result = Starlette::WebSocket::classRef() }
}
class StarletteUrl extends FindSubclassesSpec {
StarletteUrl() { this = "starlette.requests.URL~Subclass" }
override API::Node getAlreadyModeledClass() { result = Starlette::Url::classRef() }
}
class ClickhouseClient extends FindSubclassesSpec {
ClickhouseClient() { this = "clickhouse_driver.client.Client~Subclass" }
override API::Node getAlreadyModeledClass() { result = ClickhouseDriver::Client::subclassRef() }
}
class AiohttpSession extends FindSubclassesSpec {
AiohttpSession() { this = "aiohttp.ClientSession~Subclass" }
override API::Node getAlreadyModeledClass() {
result = AiohttpClientModel::ClientSession::classRef()
}
}
class FabricConnection extends FindSubclassesSpec {
FabricConnection() { this = "fabric.connection.Connection~Subclass" }
override API::Node getAlreadyModeledClass() {
result = FabricV2::Fabric::Connection::ConnectionClass::classRef()
}
}
class DjangoRawSql extends FindSubclassesSpec {
DjangoRawSql() { this = "django.db.models.expressions.RawSQL~Subclass" }
override API::Node getAlreadyModeledClass() {
result = PrivateDjango::DjangoImpl::DB::Models::Expressions::RawSql::classRef()
}
}
class DjangoHttpRequest extends FindSubclassesSpec {
DjangoHttpRequest() { this = "django.http.request.HttpRequest~Subclass" }
override API::Node getAlreadyModeledClass() {
result = PrivateDjango::DjangoImpl::DjangoHttp::Request::HttpRequest::classRef()
}
}
class FlaskClass extends FindSubclassesSpec {
FlaskClass() { this = "flask.Flask~Subclass" }
override API::Node getAlreadyModeledClass() { result = Flask::FlaskApp::classRef() }
}
class FlaskBlueprint extends FindSubclassesSpec {
FlaskBlueprint() { this = "flask.Blueprint~Subclass" }
override API::Node getAlreadyModeledClass() { result = Flask::Blueprint::classRef() }
}
class HttpxClient extends FindSubclassesSpec {
HttpxClient() { this = "httpx.Client~Subclass" }
override API::Node getAlreadyModeledClass() { result = HttpxModel::Client::classRef() }
}
class InvokeContext extends FindSubclassesSpec {
InvokeContext() { this = "invoke.context.Context~Subclass" }
override API::Node getAlreadyModeledClass() {
result = Invoke::InvokeModule::Context::ContextClass::classRef()
}
}
class MarkupSafe extends FindSubclassesSpec {
MarkupSafe() { this = "markupsafe.Markup~Subclass" }
override API::Node getAlreadyModeledClass() { result = MarkupSafeModel::Markup::classRef() }
}
class Multidict extends FindSubclassesSpec {
Multidict() { this = "multidict.MultiDictProxy~Subclass" }
override API::Node getAlreadyModeledClass() { result = Multidict::MultiDictProxy::classRef() }
}
class PyCurl extends FindSubclassesSpec {
PyCurl() { this = "pycurl.Curl~Subclass" }
override API::Node getAlreadyModeledClass() { result = Pycurl::Curl::classRef() }
}
class RestFrameworkRequest extends FindSubclassesSpec {
RestFrameworkRequest() { this = "rest_framework.request.Request~Subclass" }
override API::Node getAlreadyModeledClass() { result = RestFramework::Request::classRef() }
}
class RestFrameworkResponse extends FindSubclassesSpec {
RestFrameworkResponse() { this = "rest_framework.response.Response~Subclass" }
override API::Node getAlreadyModeledClass() { result = RestFramework::Response::classRef() }
override FindSubclassesSpec getSuperClass() { result instanceof DjangoHttpResponse }
override string getFullyQualifiedName() { result = "rest_framework.response.Response" }
}
class SqlAlchemyEngine extends FindSubclassesSpec {
SqlAlchemyEngine() { this = "sqlalchemy.engine.Engine~Subclass" }
override API::Node getAlreadyModeledClass() { result = SqlAlchemy::Engine::classRef() }
}
class SqlAlchemyConnection extends FindSubclassesSpec {
SqlAlchemyConnection() { this = "sqlalchemy.engine.Connection~Subclass" }
override API::Node getAlreadyModeledClass() { result = SqlAlchemy::Connection::classRef() }
}
class SqlAlchemySession extends FindSubclassesSpec {
SqlAlchemySession() { this = "sqlalchemy.orm.Session~Subclass" }
override API::Node getAlreadyModeledClass() { result = SqlAlchemy::Session::classRef() }
}
class UrlLibParseSplitResult extends FindSubclassesSpec {
UrlLibParseSplitResult() { this = "urllib.parse.SplitResult~Subclass" }
override API::Node getAlreadyModeledClass() { result = Stdlib::SplitResult::classRef() }
}
class StdlibHttpConnection extends FindSubclassesSpec {
StdlibHttpConnection() { this = "http.client.HTTPConnection~Subclass" }
override API::Node getAlreadyModeledClass() { result = StdlibPrivate::HttpConnection::classRef() }
}
class StringIO extends FindSubclassesSpec {
StringIO() { this = "io.StringIO~Subclass" }
override API::Node getAlreadyModeledClass() { result = StdlibPrivate::StringIO::classRef() }
}
class TornadoApplication extends FindSubclassesSpec {
TornadoApplication() { this = "tornado.web.Application~Subclass" }
override API::Node getAlreadyModeledClass() {
result = Tornado::TornadoModule::Web::Application::classRef()
}
}
class TornadoRequest extends FindSubclassesSpec {
TornadoRequest() { this = "tornado.httputil.HttpServerRequest~Subclass" }
override API::Node getAlreadyModeledClass() {
result = Tornado::TornadoModule::HttpUtil::HttpServerRequest::classRef()
}
}
class Urllib3PoolManager extends FindSubclassesSpec {
Urllib3PoolManager() { this = "urllib3.PoolManager~Subclass" }
override API::Node getAlreadyModeledClass() { result = Urllib3::PoolManager::classRef() }
}
class StdlibLogger extends FindSubclassesSpec {
StdlibLogger() { this = "logging.Logger~Subclass" }
override API::Node getAlreadyModeledClass() { result = Stdlib::Logger::subclassRef() }
}
class PydanticBaseModel extends FindSubclassesSpec {
PydanticBaseModel() { this = "pydantic.BaseModel~Subclass" }
override API::Node getAlreadyModeledClass() { result = Pydantic::BaseModel::subclassRef() }
}
class PeeweeDatabase extends FindSubclassesSpec {
PeeweeDatabase() { this = "peewee.Database~Subclass" }
override API::Node getAlreadyModeledClass() { result = Peewee::Database::subclassRef() }
}
class AiochClient extends FindSubclassesSpec {
AiochClient() { this = "aioch.Client~Subclass" }
override API::Node getAlreadyModeledClass() { result = Aioch::Client::subclassRef() }
}
class AiohttpView extends FindSubclassesSpec {
AiohttpView() { this = "aiohttp.web.View~Subclass" }
override API::Node getAlreadyModeledClass() { result = AiohttpWebModel::View::subclassRef() }
}
class DjangoFileField extends FindSubclassesSpec {
DjangoFileField() { this = "django.db.models.FileField~Subclass" }
override API::Node getAlreadyModeledClass() {
result = PrivateDjango::DjangoImpl::DB::Models::FileField::subclassRef()
}
}
class RestFrameworkApiException extends FindSubclassesSpec {
RestFrameworkApiException() { this = "rest_framework.exceptions.APIException~Subclass" }
override API::Node getAlreadyModeledClass() { result = RestFramework::ApiException::classRef() }
}
class ElementTree extends FindSubclassesSpec {
ElementTree() { this = "xml.etree.ElementTree~Subclass" }
override API::Node getAlreadyModeledClass() { result = StdlibPrivate::elementTreeClassRef() }
}
class LxmlETreeAlias extends FindSubclassesSpec {
LxmlETreeAlias() { this = "lxml.etree~Alias" }
override API::Node getAlreadyModeledClass() { result = Lxml::etreeRef() }
}
class PickleAlias extends FindSubclassesSpec {
PickleAlias() { this = "pickle~Alias" }
override API::Node getAlreadyModeledClass() { result = StdlibPrivate::pickle() }
}
class PickleLoadAlias extends FindSubclassesSpec {
PickleLoadAlias() { this = "pickle.load~Alias" }
override API::Node getAlreadyModeledClass() { result = StdlibPrivate::pickle_load() }
}
class PickleLoadsAlias extends FindSubclassesSpec {
PickleLoadsAlias() { this = "pickle.loads~Alias" }
override API::Node getAlreadyModeledClass() { result = StdlibPrivate::pickle_loads() }
}
bindingset[fullyQualified]
predicate fullyQualifiedToYamlFormat(string fullyQualified, string type2, string path) {
exists(int firstDot | firstDot = fullyQualified.indexOf(".", 0, 0) |
type2 = fullyQualified.prefix(firstDot) and
path =
("Member[" + fullyQualified.suffix(firstDot + 1).replaceAll(".", "].Member[") + "]")
.replaceAll(".Member[__init__].", "")
.replaceAll("Member[__init__].", "")
)
}
from FindSubclassesSpec spec, string newModelFullyQualified, string type2, string path, Module mod
where
newModel(spec, newModelFullyQualified, _, mod, _) and
not exists(FindSubclassesSpec subclass | subclass.getSuperClass() = spec |
// Since a class C which is a subclass for flask.MethodView is always a subclass of
// flask.View, and we chose to care about this distinction, in a naive approach we
// would always record rows for _both_ specs... that's just wasteful, so instead we
// only record the row for the more specific spec -- this is captured by the
// .getSuperClass() method on a spec, which can links specs together in this way.
// However, if the definition actually depends on some logic, like below, we should
// still record both rows
// ```
// if <cond>:
// class C(flask.View): ...
// else:
// class C(flask.MethodView): ...
// ```
newModel(subclass, newModelFullyQualified, _, mod, _)
or
// When defining specs for both foo.Foo and bar.Bar, and you encounter the class
// definition for Bar as `class Bar(foo.Foo): ...` inside `__init__.py` of the `bar`
// PyPI package, we would normally record this new class as being an unmodeled
// subclass of foo.Foo (since the class definition is not found when using
// API::moduleImport("bar").getMember("Bar")). However, we don't actually want to
// treat this as foo.Foo, since it's actually bar.Bar -- so we use the fully
// qualified name ot ignore cases such as this!
newModelFullyQualified = subclass.getFullyQualifiedName()
) and
fullyQualifiedToYamlFormat(newModelFullyQualified, type2, path) and
not Extensions::typeModel(spec, type2, path) and
(
not newModelFullyQualified.regexpMatch("(?i).*tests?_?.*")
or
type2 = "find_subclass_test"
)
select spec.(string), type2, path

View File

@@ -0,0 +1,36 @@
#!/usr/bin/env python3
"""Concerns were raised about performance on Windows with having 2.5 k files for modeling, and it was recommended we join them all together when shipping.
This script does that.
Workflow when working on the automatic subclass modeling:
1. split files
2. do your work
3. join files
4. commit your changes
"""
import sys
import glob
import os
from shared_subclass_functions import *
if joined_file.exists():
sys.exit(f"File {joined_file} already exists")
package_data = gather_from_existing()
as_lists = list()
for data in package_data.values():
as_lists.extend(list(t) for t in data)
as_lists.sort()
to_write = wrap_in_template(as_lists)
write_data(to_write, joined_file)
print("Joined all files into", joined_file)
for f in glob.glob(f"{subclass_capture_path}/auto-*.model.yml", recursive=True):
os.unlink(f)

View File

@@ -0,0 +1,79 @@
#!/usr/bin/env python3
import sys
import glob
import json
import subprocess
from collections import defaultdict
import shutil
import os
from shared_subclass_functions import *
assert mad_path.exists(), mad_path
# process data
class CodeQL:
def __init__(self):
pass
def __enter__(self):
self.proc = subprocess.Popen(['codeql', 'execute','cli-server'],
executable=shutil.which('codeql'),
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=sys.stderr,
env=os.environ.copy(),
)
return self
def __exit__(self, type, value, tb):
self.proc.stdin.write(b'["shutdown"]\0')
self.proc.stdin.close()
try:
self.proc.wait(5)
except:
self.proc.kill()
def command(self, args):
data = json.dumps(args)
data_bytes = data.encode('utf-8')
self.proc.stdin.write(data_bytes)
self.proc.stdin.write(b'\0')
self.proc.stdin.flush()
res = b''
while True:
b = self.proc.stdout.read(1)
if b == b'\0':
return res.decode('utf-8')
res += b
def gather_from_bqrs_results():
package_data = defaultdict(set)
with CodeQL() as codeql:
if os.path.exists(sys.argv[1]) and not os.path.isdir(sys.argv[1]) and sys.argv[1].endswith(".bqrs"):
files = [sys.argv[1]]
else:
files = glob.glob(f"{sys.argv[1]}/**.bqrs", recursive=True)
for f in files:
print(f"Processing {f}")
json_data = codeql.command(["bqrs", "decode", "--format=json", f])
select = json.loads(json_data)
for t in select["#select"]["tuples"]:
pkg = t[1]
package_data[pkg].add(tuple(t))
return package_data
if __name__ == "__main__":
if joined_file.exists():
sys.exit(f"File {joined_file} exists, you should split it up first")
package_data = gather_from_bqrs_results()
write_all_package_data_to_files(package_data)

View File

@@ -0,0 +1,79 @@
from typing import Dict
import yaml
from pathlib import Path
import glob
from collections import defaultdict
import re
VERSION = "process-mrva-results 0.0.1"
mad_path = Path(__file__).parent.parent.parent.parent / "lib/semmle/python/frameworks/data/internal/"
subclass_capture_path = mad_path / "subclass-capture"
joined_file = subclass_capture_path / "ALL.model.yml"
def parse_from_file(path: Path) -> set:
if not path.exists():
return set()
f = path.open("r")
assert f.readline().startswith(f"# {VERSION}\n"), path
raw_data = yaml.load(f, Loader=yaml.CBaseLoader)
assert len(raw_data["extensions"]) == 1, path
assert raw_data["extensions"][0]["addsTo"]["extensible"] == "typeModel", path
return set(tuple(x) for x in raw_data["extensions"][0]["data"])
def wrap_in_template(data):
return {
"extensions": [
{
"addsTo": {
"pack": "codeql/python-all",
"extensible": "typeModel",
},
"data": data,
}
]
}
def write_data(data, path: Path):
f = path.open("w+")
f.write(f"# {VERSION}\n")
yaml.dump(data, indent=2, stream=f, Dumper=yaml.CDumper)
def gather_from_existing():
package_data = defaultdict(set)
for f in glob.glob(f"{subclass_capture_path}/auto-*.model.yml", recursive=True):
print(f"Processing {f}")
all_data = parse_from_file(Path(f))
pkg = f.split("/")[-1].split(".")[0][5:]
package_data[pkg].update(all_data)
return package_data
def write_all_package_data_to_files(package_data: Dict[str, set]):
for pkg in package_data:
if not re.match(r"[a-zA-Z0-9-_]+", pkg):
print(f"Skipping {repr(pkg)}")
continue
pkg_path = subclass_capture_path / f"auto-{pkg}.model.yml"
print(f"Writing {pkg_path}")
all_data = parse_from_file(pkg_path)
all_data.update(package_data[pkg])
as_lists = [list(t) for t in all_data]
as_lists.sort()
data_for_yaml = wrap_in_template(as_lists)
write_data(data_for_yaml, pkg_path)

View File

@@ -0,0 +1,28 @@
#!/usr/bin/env python3
"""Concerns were raised about performance on Windows with having 2.5 k files for modeling, and it was recommended we join them all together when shipping.
This script does the opposite, so it's easier to work with locally.
Workflow when working on the automatic subclass modeling:
1. split files
2. do your work
3. join files
4. commit your changes
"""
import sys
from collections import defaultdict
from shared_subclass_functions import *
if not joined_file.exists():
sys.exit(f"File {joined_file} does not exists")
all_data = parse_from_file(joined_file)
package_data = defaultdict(set)
for t in all_data:
package_data[t[1]].add(t)
write_all_package_data_to_files(package_data)
joined_file.unlink()

View File

@@ -0,0 +1,17 @@
/**
* @name Interesting taint sinks
* @description Interesting sinks from TaintTracking queries.
* @kind problem
* @problem.severity recommendation
* @id py/meta/alerts/interesting-taint-sinks
* @tags meta
* @precision very-low
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import Sinks
from string kind
where not kind in ["CleartextLogging", "LogInjection"]
select taintSink(kind), kind + " sink"

View File

@@ -0,0 +1,79 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import meta.MetaMetrics
import semmle.python.security.dataflow.CleartextLoggingCustomizations
import semmle.python.security.dataflow.CleartextStorageCustomizations
import semmle.python.security.dataflow.CodeInjectionCustomizations
import semmle.python.security.dataflow.CommandInjectionCustomizations
import semmle.python.security.dataflow.LdapInjectionCustomizations
import semmle.python.security.dataflow.LogInjectionCustomizations
import semmle.python.security.dataflow.NoSqlInjectionCustomizations
import semmle.python.security.dataflow.PathInjectionCustomizations
import semmle.python.security.dataflow.PolynomialReDoSCustomizations
import semmle.python.security.dataflow.ReflectedXSSCustomizations
import semmle.python.security.dataflow.RegexInjectionCustomizations
import semmle.python.security.dataflow.ServerSideRequestForgeryCustomizations
import semmle.python.security.dataflow.SqlInjectionCustomizations
import semmle.python.security.dataflow.StackTraceExposureCustomizations
import semmle.python.security.dataflow.TarSlipCustomizations
import semmle.python.security.dataflow.UnsafeDeserializationCustomizations
import semmle.python.security.dataflow.UrlRedirectCustomizations
import semmle.python.security.dataflow.WeakSensitiveDataHashingCustomizations
import semmle.python.security.dataflow.XmlBombCustomizations
import semmle.python.security.dataflow.XpathInjectionCustomizations
import semmle.python.security.dataflow.XxeCustomizations
DataFlow::Node taintSink(string kind) {
not result.getLocation().getFile() instanceof IgnoredFile and
(
kind = "CleartextLogging" and result instanceof CleartextLogging::Sink
or
kind = "CleartextStorage" and result instanceof CleartextStorage::Sink
or
kind = "CodeInjection" and result instanceof CodeInjection::Sink
or
kind = "CommandInjection" and result instanceof CommandInjection::Sink
or
kind = "LdapInjection (DN)" and result instanceof LdapInjection::DnSink
or
kind = "LdapInjection (Filter)" and result instanceof LdapInjection::FilterSink
or
kind = "LogInjection" and result instanceof LogInjection::Sink
or
kind = "PathInjection" and result instanceof PathInjection::Sink
or
kind = "PolynomialReDoS" and result instanceof PolynomialReDoS::Sink
or
kind = "ReflectedXss" and result instanceof ReflectedXss::Sink
or
kind = "RegexInjection" and result instanceof RegexInjection::Sink
or
kind = "NoSqlInjection (string sink)" and result instanceof NoSqlInjection::StringSink
or
kind = "NoSqlInjection (dict sink)" and result instanceof NoSqlInjection::DictSink
or
kind = "ServerSideRequestForgery" and result instanceof ServerSideRequestForgery::Sink
or
kind = "SqlInjection" and result instanceof SqlInjection::Sink
or
kind = "StackTraceExposure" and result instanceof StackTraceExposure::Sink
or
kind = "TarSlip" and result instanceof TarSlip::Sink
or
kind = "UnsafeDeserialization" and result instanceof UnsafeDeserialization::Sink
or
kind = "UrlRedirect" and result instanceof UrlRedirect::Sink
or
kind = "WeakSensitiveDataHashing (NormalHashFunction)" and
result instanceof NormalHashFunction::Sink
or
kind = "WeakSensitiveDataHashing (ComputationallyExpensiveHashFunction)" and
result instanceof ComputationallyExpensiveHashFunction::Sink
or
kind = "XmlBomb" and result instanceof XmlBomb::Sink
or
kind = "XpathInjection" and result instanceof XpathInjection::Sink
or
kind = "Xxe" and result instanceof Xxe::Sink
)
}

View File

@@ -10,83 +10,7 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import meta.MetaMetrics
import semmle.python.security.dataflow.CleartextLoggingCustomizations
import semmle.python.security.dataflow.CleartextStorageCustomizations
import semmle.python.security.dataflow.CodeInjectionCustomizations
import semmle.python.security.dataflow.CommandInjectionCustomizations
import semmle.python.security.dataflow.LdapInjectionCustomizations
import semmle.python.security.dataflow.LogInjectionCustomizations
import semmle.python.security.dataflow.NoSqlInjectionCustomizations
import semmle.python.security.dataflow.PathInjectionCustomizations
import semmle.python.security.dataflow.PolynomialReDoSCustomizations
import semmle.python.security.dataflow.ReflectedXSSCustomizations
import semmle.python.security.dataflow.RegexInjectionCustomizations
import semmle.python.security.dataflow.ServerSideRequestForgeryCustomizations
import semmle.python.security.dataflow.SqlInjectionCustomizations
import semmle.python.security.dataflow.StackTraceExposureCustomizations
import semmle.python.security.dataflow.TarSlipCustomizations
import semmle.python.security.dataflow.UnsafeDeserializationCustomizations
import semmle.python.security.dataflow.UrlRedirectCustomizations
import semmle.python.security.dataflow.WeakSensitiveDataHashingCustomizations
import semmle.python.security.dataflow.XmlBombCustomizations
import semmle.python.security.dataflow.XpathInjectionCustomizations
import semmle.python.security.dataflow.XxeCustomizations
DataFlow::Node relevantTaintSink(string kind) {
not result.getLocation().getFile() instanceof IgnoredFile and
(
kind = "CleartextLogging" and result instanceof CleartextLogging::Sink
or
kind = "CleartextStorage" and result instanceof CleartextStorage::Sink
or
kind = "CodeInjection" and result instanceof CodeInjection::Sink
or
kind = "CommandInjection" and result instanceof CommandInjection::Sink
or
kind = "LdapInjection (DN)" and result instanceof LdapInjection::DnSink
or
kind = "LdapInjection (Filter)" and result instanceof LdapInjection::FilterSink
or
kind = "LogInjection" and result instanceof LogInjection::Sink
or
kind = "PathInjection" and result instanceof PathInjection::Sink
or
kind = "PolynomialReDoS" and result instanceof PolynomialReDoS::Sink
or
kind = "ReflectedXss" and result instanceof ReflectedXss::Sink
or
kind = "RegexInjection" and result instanceof RegexInjection::Sink
or
kind = "NoSqlInjection (string sink)" and result instanceof NoSqlInjection::StringSink
or
kind = "NoSqlInjection (dict sink)" and result instanceof NoSqlInjection::DictSink
or
kind = "ServerSideRequestForgery" and result instanceof ServerSideRequestForgery::Sink
or
kind = "SqlInjection" and result instanceof SqlInjection::Sink
or
kind = "StackTraceExposure" and result instanceof StackTraceExposure::Sink
or
kind = "TarSlip" and result instanceof TarSlip::Sink
or
kind = "UnsafeDeserialization" and result instanceof UnsafeDeserialization::Sink
or
kind = "UrlRedirect" and result instanceof UrlRedirect::Sink
or
kind = "WeakSensitiveDataHashing (NormalHashFunction)" and
result instanceof NormalHashFunction::Sink
or
kind = "WeakSensitiveDataHashing (ComputationallyExpensiveHashFunction)" and
result instanceof ComputationallyExpensiveHashFunction::Sink
or
kind = "XmlBomb" and result instanceof XmlBomb::Sink
or
kind = "XpathInjection" and result instanceof XpathInjection::Sink
or
kind = "Xxe" and result instanceof Xxe::Sink
)
}
private import Sinks
from string kind
select relevantTaintSink(kind), kind + " sink"
select taintSink(kind), kind + " sink"

View File

@@ -1,5 +1,5 @@
name: codeql/python-queries
version: 0.9.6-dev
version: 0.9.7-dev
groups:
- python
- queries