Merge branch 'main' into shared-taint-tracking

This commit is contained in:
Jeroen Ketema
2023-08-17 00:14:25 +02:00
committed by GitHub
524 changed files with 12240 additions and 7635 deletions

View File

@@ -1,3 +1,7 @@
## 0.10.2
No user-facing changes.
## 0.10.1
### New Features

View File

@@ -492,9 +492,14 @@ class NiceLocationExpr extends Expr {
// for `import xxx` or for `import xxx as yyy`.
this.(ImportExpr).getLocation().hasLocationInfo(f, bl, bc, el, ec)
or
/* Show y for `y` in `from xxx import y` */
exists(string name |
name = this.(ImportMember).getName() and
// Show y for `y` in `from xxx import y`
// and y for `yyy as y` in `from xxx import yyy as y`.
exists(string name, Alias alias |
// This alias will always exist, as `from xxx import y`
// is expanded to `from xxx imprt y as y`.
this = alias.getValue() and
name = alias.getAsname().(Name).getId()
|
this.(ImportMember).getLocation().hasLocationInfo(f, _, _, el, ec) and
bl = el and
bc = ec - name.length() + 1

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Improvements of the `aiohttp` models including remote-flow-sources from type annotations, new path manipulation, and SSRF sinks.

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added modeling of AWS Lambda handlers that can be identified with `AWS::Serverless::Function` in YAML files, where the event parameter is modeled as a remote-flow-source.

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Support analyzing packages (folders with python code) that do not have `__init__.py` files, although this is technically required, we see real world projects that don't have this.

View File

@@ -0,0 +1,4 @@
---
category: fix
---
* Fixed the computation of locations for imports with aliases in jump-to-definition.

View File

@@ -0,0 +1,3 @@
## 0.10.2
No user-facing changes.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.10.1
lastReleaseVersion: 0.10.2

View File

@@ -1,5 +1,5 @@
name: codeql/python-all
version: 0.10.2-dev
version: 0.10.3-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python

View File

@@ -49,6 +49,7 @@ private import semmle.python.frameworks.Requests
private import semmle.python.frameworks.RestFramework
private import semmle.python.frameworks.Rsa
private import semmle.python.frameworks.RuamelYaml
private import semmle.python.frameworks.ServerLess
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.SqlAlchemy
private import semmle.python.frameworks.Starlette

View File

@@ -195,7 +195,22 @@ private predicate isPotentialPackage(Folder f) {
}
private string moduleNameFromBase(Container file) {
isPotentialPackage(file) and result = file.getBaseName()
// We used to also require `isPotentialPackage(f)` to hold in this case,
// but we saw modules not getting resolved because their folder did not
// contain an `__init__.py` file.
//
// This makes the folder not be a package but a namespace package instead.
// In most cases this is a mistake :| See following links for more details
// - https://dev.to/methane/don-t-omit-init-py-3hga
// - https://packaging.python.org/en/latest/guides/packaging-namespace-packages/
// - https://discuss.python.org/t/init-py-pep-420-and-iter-modules-confusion/9642
//
// It is possible that we can keep the original requirement on
// `isPotentialPackage(f)` here, but relax `isPotentialPackage` itself to allow
// for this behavior of missing `__init__.py` files. However, doing so involves
// cascading changes (for example to `moduleNameFromFile`), and was a more involved
// task than we wanted to take on.
result = file.getBaseName()
or
file instanceof File and result = file.getStem()
}

View File

@@ -468,6 +468,27 @@ module AiohttpWebModel {
override string getSourceType() { result = "aiohttp.web.Request" }
}
/**
* A parameter that has a type annotation of `aiohttp.web.Request`, so with all
* likelihood will receive an `aiohttp.web.Request` instance at some point when a
* request handler is invoked.
*/
class AiohttpRequestParamFromTypeAnnotation extends Request::InstanceSource,
DataFlow::ParameterNode, RemoteFlowSource::Range
{
AiohttpRequestParamFromTypeAnnotation() {
not this instanceof AiohttpRequestHandlerRequestParam and
this.getParameter().getAnnotation() =
API::moduleImport("aiohttp")
.getMember("web")
.getMember("Request")
.getAValueReachableFromSource()
.asExpr()
}
override string getSourceType() { result = "aiohttp.web.Request from type-annotation" }
}
/**
* A read of the `request` attribute on an instance of an aiohttp.web View class,
* which is the request being processed currently.
@@ -498,14 +519,17 @@ module AiohttpWebModel {
* - https://docs.aiohttp.org/en/stable/web_quickstart.html#aiohttp-web-exceptions
*/
class AiohttpWebResponseInstantiation extends Http::Server::HttpResponse::Range,
Response::InstanceSource, DataFlow::CallCfgNode
Response::InstanceSource, API::CallNode
{
API::Node apiNode;
AiohttpWebResponseInstantiation() {
this = apiNode.getACall() and
(
apiNode = API::moduleImport("aiohttp").getMember("web").getMember("Response")
apiNode =
API::moduleImport("aiohttp")
.getMember("web")
.getMember(["FileResponse", "Response", "StreamResponse"])
or
exists(string httpExceptionClassName |
httpExceptionClassName in [
@@ -545,6 +569,10 @@ module AiohttpWebModel {
override DataFlow::Node getMimetypeOrContentTypeArg() {
result = this.getArgByName("content_type")
or
exists(string key | key.toLowerCase() = "content-type" |
result = this.getKeywordParameter("headers").getSubscript(key).getAValueReachingSink()
)
}
override string getMimetypeDefault() {
@@ -556,6 +584,37 @@ module AiohttpWebModel {
}
}
/**
* A call to the `aiohttp.web.FileResponse` constructor as a sink for Filesystem access.
*/
class FileResponseCall extends FileSystemAccess::Range, API::CallNode {
FileResponseCall() {
this = API::moduleImport("aiohttp").getMember("web").getMember("FileResponse").getACall()
}
override DataFlow::Node getAPathArgument() { result = this.getParameter(0, "path").asSink() }
}
/**
* An instantiation of `aiohttp.web.StreamResponse`.
*
* See https://docs.aiohttp.org/en/stable/web_reference.html#aiohttp.web.StreamResponse
*/
class StreamResponse extends AiohttpWebResponseInstantiation {
StreamResponse() {
this = API::moduleImport("aiohttp").getMember("web").getMember("StreamResponse").getACall()
}
override DataFlow::Node getBody() {
result =
this.getReturn()
.getMember(["write", "write_eof"])
.getACall()
.getParameter(0, "data")
.asSink()
}
}
/** Gets an HTTP response instance. */
private API::Node aiohttpResponseInstance() {
result = any(AiohttpWebResponseInstantiation call).getApiNode().getReturn()
@@ -670,14 +729,14 @@ private module AiohttpClientModel {
string methodName;
OutgoingRequestCall() {
methodName in [Http::httpVerbLower(), "request"] and
methodName in [Http::httpVerbLower(), "request", "ws_connect"] and
this = instance().getMember(methodName).getACall()
}
override DataFlow::Node getAUrlPart() {
result = this.getArgByName("url")
or
not methodName = "request" and
methodName in [Http::httpVerbLower(), "ws_connect"] and
result = this.getArg(0)
or
methodName = "request" and

View File

@@ -0,0 +1,67 @@
/**
* Provides classes and predicates for working with those serverless handlers,
* handled by the shared library.
*
* E.g. [AWS](https://docs.aws.amazon.com/lambda/latest/dg/python-handler.html).
*
* In particular a `RemoteFlowSource` is added for each.
*/
import python
import codeql.serverless.ServerLess
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.RemoteFlowSources
private module YamlImpl implements Input {
import semmle.python.Files
import semmle.python.Yaml
}
module SL = ServerLess<YamlImpl>;
/**
* Gets a function that is a serverless request handler.
*
* For example: if an AWS serverless resource contains the following properties (in the "template.yml" file):
* ```yaml
* Handler: mylibrary.handler
* Runtime: pythonXXX
* CodeUri: backend/src/
* ```
*
* And a file "mylibrary.py" exists in the folder "backend/src" (relative to the "template.yml" file).
* Then the result of this predicate is a function exported as "handler" from "mylibrary.py".
* The "mylibrary.py" file could for example look like:
*
* ```python
* def handler(event):
* ...
* ```
*/
private Function getAServerlessHandler() {
exists(File file, string stem, string handler, string runtime, Module mod |
SL::hasServerlessHandler(stem, handler, _, runtime) and
file.getAbsolutePath() = stem + ".py" and
// if runtime is specified, it should be python
(runtime = "" or runtime.matches("python%"))
|
mod.getFile() = file and
result.getScope() = mod and
result.getName() = handler
)
}
private DataFlow::ParameterNode getAHandlerEventParameter() {
exists(Function func | func = getAServerlessHandler() |
result.getParameter() in [func.getArg(0), func.getArgByName("event")]
)
}
/**
* A serverless request handler event, seen as a RemoteFlowSource.
*/
private class ServerlessHandlerEventAsRemoteFlow extends RemoteFlowSource::Range {
ServerlessHandlerEventAsRemoteFlow() { this = getAHandlerEventParameter() }
override string getSourceType() { result = "Serverless event" }
}

View File

@@ -1,3 +1,7 @@
## 0.8.2
No user-facing changes.
## 0.8.1
### Minor Analysis Improvements

View File

@@ -0,0 +1,3 @@
## 0.8.2
No user-facing changes.

View File

@@ -1,2 +1,2 @@
---
lastReleaseVersion: 0.8.1
lastReleaseVersion: 0.8.2

View File

@@ -1,5 +1,5 @@
name: codeql/python-queries
version: 0.8.2-dev
version: 0.8.3-dev
groups:
- python
- queries

View File

@@ -17,6 +17,6 @@ Since PEP 420 was accepted in Python 3, this test is Python 3 only.
from foo.bar.a import afunc
from foo_explicit.bar.a import explicit_afunc
afunc() # $ MISSING: pt,tt=afunc
afunc() # $ pt,tt="foo/bar/a.py:afunc"
explicit_afunc() # $ pt,tt="foo_explicit/bar/a.py:explicit_afunc"

View File

@@ -292,10 +292,11 @@ module HttpServerHttpResponseTest implements TestSig {
exists(DedicatedResponseTest d | d.isDedicatedFile(file))
) and
(
exists(Http::Server::HttpResponse response |
location = response.getLocation() and
element = response.toString() and
value = prettyNodeForInlineTest(response.getBody()) and
exists(Http::Server::HttpResponse response, DataFlow::Node body |
body = response.getBody() and
location = body.getLocation() and
element = body.toString() and
value = prettyNodeForInlineTest(body) and
tag = "responseBody"
)
or

View File

@@ -1,2 +1,19 @@
import experimental.meta.InlineTaintTest
import MakeInlineTaintTest<TestTaintTrackingConfig>
predicate isSafe(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
g.(CallNode).getFunction().(NameNode).getId() = "is_safe" and
node = g.(CallNode).getArg(_) and
branch = true
}
module CustomSanitizerOverridesConfig implements DataFlow::ConfigSig {
predicate isSource = TestTaintTrackingConfig::isSource/1;
predicate isSink = TestTaintTrackingConfig::isSink/1;
predicate isBarrier(DataFlow::Node node) {
node = DataFlow::BarrierGuard<isSafe/3>::getABarrierNode()
}
}
import MakeInlineTaintTest<CustomSanitizerOverridesConfig>

View File

@@ -33,3 +33,5 @@ async def test():
assert context.verify_mode == ssl.VerifyMode.CERT_NONE
s.get("url", ssl=context) # $ clientRequestUrlPart="url" MISSING: clientRequestCertValidationDisabled
s.ws_connect("url") # $ clientRequestUrlPart="url"

View File

@@ -23,6 +23,9 @@ async def html_text(request): # $ requestHandler
async def html_body(request): # $ requestHandler
return web.Response(body=b"foo", content_type="text/html") # $ HttpResponse mimetype=text/html responseBody=b"foo"
@routes.get("/html_body_header") # $ routeSetup="/html_body_header"
async def html_body_header(request): # $ requestHandler
return web.Response(headers={"content-type": "text/html"}, text="foo") # $ HttpResponse mimetype=text/html responseBody="foo"
@routes.get("/html_body_set_later") # $ routeSetup="/html_body_set_later"
async def html_body_set_later(request): # $ requestHandler
@@ -65,6 +68,26 @@ async def redirect_302(request): # $ requestHandler
else:
raise web.HTTPFound(location="/logout") # $ HttpResponse HttpRedirectResponse mimetype=application/octet-stream redirectLocation="/logout"
@routes.get("/file_response") # $ routeSetup="/file_response"
async def file_response(request): # $ requestHandler
filename = "foo.txt"
resp = web.FileResponse(filename) # $ HttpResponse mimetype=application/octet-stream getAPathArgument=filename
resp = web.FileResponse(path=filename) # $ HttpResponse mimetype=application/octet-stream getAPathArgument=filename
return resp
@routes.get("/streaming_response") # $ routeSetup="/streaming_response"
async def streaming_response(request): # $ requestHandler
resp = web.StreamResponse() # $ HttpResponse mimetype=application/octet-stream
await resp.prepare(request)
await resp.write(b"foo") # $ responseBody=b"foo"
await resp.write(data=b"bar") # $ responseBody=b"bar"
await resp.write_eof(b"baz") # $ responseBody=b"baz"
return resp
################################################################################
# Cookies
################################################################################

View File

@@ -142,10 +142,36 @@ class TaintTestClass(web.View):
self.request.url # $ tainted
)
# not a request handler, and not called, but since we have type-annotation, should be a
# remote-flow-source.
async def test_source_from_type_annotation(request: web.Request):
# picking out just a few of the tests from `test_taint` above, to show that we have
# the same taint-steps :)
ensure_tainted(
request, # $ tainted
request.url, # $ tainted
await request.content.read(), # $ tainted
)
# Test that since we can reach the `request` object in the helper function, we don't
# introduce a new remote-flow-source, but instead use the one from the caller. (which is
# checked to not be tainted)
async def test_sanitizer(request): # $ requestHandler
ensure_tainted(request, request.url, await request.content.read()) # $ tainted
if (is_safe(request)):
ensure_not_tainted(request, request.url, await request.content.read())
test_safe_helper_function_no_route_with_type(request)
async def test_safe_helper_function_no_route_with_type(request: web.Request):
ensure_not_tainted(request, request.url, await request.content.read()) # $ SPURIOUS: tainted
app = web.Application()
app.router.add_get(r"/test_taint/{name}/{number:\d+}", test_taint) # $ routeSetup="/test_taint/{name}/{number:\d+}"
app.router.add_view(r"/test_taint_class", TaintTestClass) # $ routeSetup="/test_taint_class"
app.router.add_view(r"/test_sanitizer", test_sanitizer) # $ routeSetup="/test_sanitizer"
if __name__ == "__main__":

View File

@@ -0,0 +1,4 @@
failures
argumentToEnsureNotTaintedNotMarkedAsSpurious
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
testFailures

View File

@@ -0,0 +1,2 @@
import experimental.meta.InlineTaintTest
import MakeInlineTaintTest<TestTaintTrackingConfig>

View File

@@ -0,0 +1,13 @@
def handler1(event, context):
ensure_tainted(event) # $ tainted
return "Hello World!"
def handler2(event, context):
ensure_tainted(event) # $ tainted
return "Hello World!"
# This function is not mentioned in template.yml
# and so it is not receiving user input.
def non_handler(event, context):
ensure_not_tainted(event)
return "Hello World!"

View File

@@ -0,0 +1,11 @@
def lambda_handler(event, context):
ensure_tainted(
event, # $ tainted
# event is usually a dict, see https://docs.aws.amazon.com/lambda/latest/dg/python-handler.html
event["key"], # $ tainted
event["key"]["key2"], # $ tainted
event["key"][0], # $ tainted
# but can also be a list
event[0], # $ tainted
)
return "OK"

View File

@@ -0,0 +1,62 @@
# inspired by https://github.com/awsdocs/aws-lambda-developer-guide/blob/main/sample-apps/blank-python/template.yml
# but we have added extra handlers
AWSTemplateFormatVersion: '2010-09-09'
Transform: 'AWS::Serverless-2016-10-31'
Description: An AWS Lambda application that calls the Lambda API.
Resources:
function:
Type: AWS::Serverless::Function
Properties:
Handler: lambda_function.lambda_handler
Runtime: python3.8
CodeUri: function/.
Description: Call the AWS Lambda API
Timeout: 10
# Function's execution role
Policies:
- AWSLambdaBasicExecutionRole
- AWSLambda_ReadOnlyAccess
- AWSXrayWriteOnlyAccess
Tracing: Active
Layers:
- !Ref libs
function:
Type: AWS::Serverless::Function
Properties:
Handler: extra_lambdas.handler1
Runtime: python3.8
CodeUri: function/.
Description: Call the AWS Lambda API
Timeout: 10
# Function's execution role
Policies:
- AWSLambdaBasicExecutionRole
- AWSLambda_ReadOnlyAccess
- AWSXrayWriteOnlyAccess
Tracing: Active
Layers:
- !Ref libs
function:
Type: AWS::Serverless::Function
Properties:
Handler: extra_lambdas.handler2
Runtime: python3.8
CodeUri: function/.
Description: Call the AWS Lambda API
Timeout: 10
# Function's execution role
Policies:
- AWSLambdaBasicExecutionRole
- AWSLambda_ReadOnlyAccess
- AWSXrayWriteOnlyAccess
Tracing: Active
Layers:
- !Ref libs
libs:
Type: AWS::Serverless::LayerVersion
Properties:
LayerName: blank-python-lib
Description: Dependencies for the blank-python sample app.
ContentUri: package/.
CompatibleRuntimes:
- python3.8

View File

@@ -0,0 +1 @@
semmle-extractor-options: -R .

View File

@@ -0,0 +1,4 @@
from nova.api.openstack.placement import microversion
from nova.api.openstack.placement.objects import resource_provider as rp_obj
from nova.api.openstack.placement.policies import allocation_candidate as \
policies

View File

@@ -0,0 +1,6 @@
| import_statements.py | 1 | 6 | 1 | 33 |
| import_statements.py | 1 | 42 | 1 | 53 |
| import_statements.py | 2 | 6 | 2 | 41 |
| import_statements.py | 2 | 71 | 2 | 76 |
| import_statements.py | 3 | 6 | 3 | 42 |
| import_statements.py | 4 | 5 | 4 | 12 |

View File

@@ -0,0 +1,6 @@
import python
import analysis.DefinitionTracking
from NiceLocationExpr expr, string f, int bl, int bc, int el, int ec
where expr.hasLocationInfo(f, bl, bc, el, ec)
select f, bl, bc, el, ec

View File

@@ -35,9 +35,9 @@
| redos.py:139:25:139:31 | (\\w\|G)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
| redos.py:145:25:145:32 | (\\d\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:148:25:148:31 | (\\d\|5)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '5'. |
| redos.py:151:25:151:34 | (\\s\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
| redos.py:154:25:154:38 | (\\s\|[\\v]\|\\\\v)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000b'. |
| redos.py:157:25:157:34 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
| redos.py:151:25:151:34 | (\\s\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\u000c'. |
| redos.py:154:25:154:38 | (\\s\|[\\v]\|\\\\v)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\u000b'. |
| redos.py:157:25:157:34 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\u000c'. |
| redos.py:160:25:160:32 | (\\W\|\\D)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
| redos.py:163:25:163:32 | (\\S\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:166:25:166:34 | (\\S\|[\\w])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
@@ -67,8 +67,8 @@
| redos.py:259:24:259:126 | (.thisisagoddamnlongstringforstresstestingthequery\|\\sthisisagoddamnlongstringforstresstestingthequery)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\tthisisagoddamnlongstringforstresstestingthequery'. |
| redos.py:262:24:262:87 | (thisisagoddamnlongstringforstresstestingthequery\|this\\w+query)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'thisisagoddamnlongstringforstresstestingthequery'. |
| redos.py:262:78:262:80 | \\w+ | This part of the regular expression may cause exponential backtracking on strings starting with 'this' and containing many repetitions of '0querythis'. |
| redos.py:268:28:268:39 | ([\ufffd\ufffd]\|[\ufffd\ufffd])* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
| redos.py:271:28:271:41 | ((\ufffd\|\ufffd)\|(\ufffd\|\ufffd))* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
| redos.py:268:28:268:39 | ([\ufffd\ufffd]\|[\ufffd\ufffd])* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\\ufffd'. |
| redos.py:271:28:271:41 | ((\ufffd\|\ufffd)\|(\ufffd\|\ufffd))* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\\ufffd'. |
| redos.py:274:31:274:32 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
| redos.py:277:48:277:50 | \\s* | This part of the regular expression may cause exponential backtracking on strings starting with '<0\\t0=' and containing many repetitions of '""\\t0='. |
| redos.py:283:26:283:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
@@ -103,5 +103,5 @@
| redos.py:385:24:385:30 | (\\d\|0)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:386:26:386:32 | (\\d\|0)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:391:15:391:25 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of 'a'. |
| unittests.py:5:17:5:23 | (\u00c6\|\\\u00c6)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '\u00c6'. |
| unittests.py:5:17:5:23 | (\u00c6\|\\\u00c6)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '\\u00c6'. |
| unittests.py:9:16:9:24 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |