From 4d2ce6b2e024a6bd7a7ddd0602e3ad5960d72709 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Fri, 23 Jun 2023 08:10:49 +0200 Subject: [PATCH] python: create shared serverless module and use it Modelled on the javascript serverless module, but - The predicate that reports YAML files is now public so languages can implement their own file conventions. - It also reports framework and runtime. - The conveninece predicates with files still exist, but they only report the path. - Handler mapping conventions are now documented. - Use parameterised serverless module in Python, tests now pass. --- python/ql/lib/semmle/python/Frameworks.qll | 1 + .../semmle/python/frameworks/ServerLess.qll | 59 +++++++ .../aws_lambda/function/extra_lambdas.py | 4 +- .../aws_lambda/function/lambda_function.py | 2 +- shared/yaml/codeql/serverless/ServerLess.qll | 160 ++++++++++++++++++ shared/yaml/codeql/serverless/mapping.md | 81 +++++++++ 6 files changed, 304 insertions(+), 3 deletions(-) create mode 100644 python/ql/lib/semmle/python/frameworks/ServerLess.qll create mode 100644 shared/yaml/codeql/serverless/ServerLess.qll create mode 100644 shared/yaml/codeql/serverless/mapping.md diff --git a/python/ql/lib/semmle/python/Frameworks.qll b/python/ql/lib/semmle/python/Frameworks.qll index c0d3c5e0b23..82cb69679cb 100644 --- a/python/ql/lib/semmle/python/Frameworks.qll +++ b/python/ql/lib/semmle/python/Frameworks.qll @@ -49,6 +49,7 @@ private import semmle.python.frameworks.Requests private import semmle.python.frameworks.RestFramework private import semmle.python.frameworks.Rsa private import semmle.python.frameworks.RuamelYaml +private import semmle.python.frameworks.ServerLess private import semmle.python.frameworks.Simplejson private import semmle.python.frameworks.SqlAlchemy private import semmle.python.frameworks.Starlette diff --git a/python/ql/lib/semmle/python/frameworks/ServerLess.qll b/python/ql/lib/semmle/python/frameworks/ServerLess.qll new file mode 100644 index 00000000000..660bbb364ac --- /dev/null +++ b/python/ql/lib/semmle/python/frameworks/ServerLess.qll @@ -0,0 +1,59 @@ +import python +import codeql.serverless.ServerLess +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.RemoteFlowSources + +private module YamlImpl implements Input { + import semmle.python.Files + import semmle.python.Yaml +} + +module SL = ServerLess; + +/** + * Gets a function that is a serverless request handler. + * + * For example: if an AWS serverless resource contains the following properties (in the "template.yml" file): + * ```yaml + * Handler: mylibrary.handler + * Runtime: pythonXXX + * CodeUri: backend/src/ + * ``` + * + * And a file "mylibrary.py" exists in the folder "backend/src" (relative to the "template.yml" file). + * Then the result of this predicate is a function exported as "handler" from "mylibrary.py". + * The "mylibrary.py" file could for example look like: + * + * ```python + * def handler(event): + * ... + * ``` + */ +private Function getAServerlessHandler() { + exists(File file, string stem, string handler, string runtime, Module mod | + SL::hasServerlessHandler(stem, handler, _, runtime) and + file.getAbsolutePath() = stem + ".py" and + // if runtime is specified, it should be python + (runtime = "" or runtime.matches("python%")) + | + mod.getFile() = file and + mod.getAnExport() = handler and + result.getEnclosingModule() = mod and + result.getName() = handler + ) +} + +private DataFlow::ParameterNode getAHandlerEventParameter() { + exists(Function func | func = getAServerlessHandler() | + result.getParameter() in [func.getArg(0), func.getArgByName("event")] + ) +} + +/** + * A serverless request handler event, seen as a RemoteFlowSource. + */ +private class ServerlessHandlerEventAsRemoteFlow extends RemoteFlowSource::Range { + ServerlessHandlerEventAsRemoteFlow() { this = getAHandlerEventParameter() } + + override string getSourceType() { result = "Serverless event" } +} diff --git a/python/ql/test/library-tests/frameworks/serverless/aws_lambda/function/extra_lambdas.py b/python/ql/test/library-tests/frameworks/serverless/aws_lambda/function/extra_lambdas.py index 80dccd9a756..2f37bba5e99 100644 --- a/python/ql/test/library-tests/frameworks/serverless/aws_lambda/function/extra_lambdas.py +++ b/python/ql/test/library-tests/frameworks/serverless/aws_lambda/function/extra_lambdas.py @@ -1,5 +1,5 @@ -def handler1(event, context): # $ MISSING: remoteFlow=event, remoteFlow=context +def handler1(event, context): # $ remoteFlow=event return "Hello World!" -def handler2(event, context): # $ MISSING: remoteFlow=event, remoteFlow=context +def handler2(event, context): # $ remoteFlow=event return "Hello World!" diff --git a/python/ql/test/library-tests/frameworks/serverless/aws_lambda/function/lambda_function.py b/python/ql/test/library-tests/frameworks/serverless/aws_lambda/function/lambda_function.py index 9637b0b66c9..4efa7499624 100644 --- a/python/ql/test/library-tests/frameworks/serverless/aws_lambda/function/lambda_function.py +++ b/python/ql/test/library-tests/frameworks/serverless/aws_lambda/function/lambda_function.py @@ -1,2 +1,2 @@ -def lambda_handler(event, context): # $ MISSING: remoteFlow=event, remoteFlow=context +def lambda_handler(event, context): # $ remoteFlow=event return "OK" diff --git a/shared/yaml/codeql/serverless/ServerLess.qll b/shared/yaml/codeql/serverless/ServerLess.qll new file mode 100644 index 00000000000..bc16626e1e3 --- /dev/null +++ b/shared/yaml/codeql/serverless/ServerLess.qll @@ -0,0 +1,160 @@ +/** + * Provides classes and predicates for working with serverless handlers. + * E.g. [AWS](https://docs.aws.amazon.com/lambda/latest/dg/nodejs-handler.html) or [serverless](https://npmjs.com/package/serverless) + */ + +/** + * Provides the input for the `ServerLess` module. + * Most of these should be provided by the `yaml` library. + */ +signature module Input { + // -------------------------------------------------- + // The below should be provided by the `yaml` library. + // -------------------------------------------------- + class Container { + string getAbsolutePath(); + + Container getParentContainer(); + } + + class File extends Container; + + class YamlNode { + File getFile(); + + YamlCollection getParentNode(); + } + + class YamlValue extends YamlNode; + + class YamlCollection extends YamlValue; + + class YamlScalar extends YamlValue { + string getValue(); + } + + class YamlMapping extends YamlCollection { + YamlValue lookup(string key); + + YamlValue getValue(int i); + } +} + +/** + * Provides classes and predicates for working with serverless handlers. + * Supports AWS, Alibaba, and serverless. + * + * Common usage is to interpret the handlers as functions and add the + * first argument of these as remote flow sources. + */ +module ServerLess { + import I + + /** + * Gets the looked up value as a convenience. + */ + pragma[inline] + private string lookupValue(YamlMapping mapping, string property) { + result = mapping.lookup(property).(YamlScalar).getValue() + } + + /** + * Gets a string where an ending "/." is simplified to "/" (if it exists). + */ + bindingset[base] + private string removeTrailingDot(string base) { + if base.regexpMatch(".*/\\.") + then result = base.substring(0, base.length() - 1) + else result = base + } + + /** + * Gets a string where a leading "./" is simplified to "" (if it exists). + */ + bindingset[base] + private string removeLeadingDotSlash(string base) { + if base.regexpMatch("\\./.*") then result = base.substring(2, base.length()) else result = base + } + + /** + * Gets a string suitable as part of a file path. + */ + bindingset[base] + private string normalise(string base) { result = removeLeadingDotSlash(removeTrailingDot(base)) } + + /** + * Holds if the `.yml` file `ymlFile` contains a serverless configuration fro `framework` with + * `handler`, `codeURI`, and `runtime` properties. + * `codeURI` and `runtime` default to the empty string if no explicit value is set in the configuration. + * + * `handler` should be interpreted in a language specific way, see `mapping.md`. + */ + predicate hasServerlessHandler( + File ymlFile, string framework, string handler, string codeUri, string runtime + ) { + exists(YamlMapping resource | ymlFile = resource.getFile() | + // There exists at least "AWS::Serverless::Function" and "Aliyun::Serverless::Function" + resource.lookup("Type").(YamlScalar).getValue().regexpMatch(".*::Serverless::Function") and + framework = lookupValue(resource, "Type") and + exists(YamlMapping properties | properties = resource.lookup("Properties") | + ( + handler = lookupValue(properties, "Handler") and + ( + if exists(properties.lookup("CodeUri")) + then codeUri = normalise(lookupValue(properties, "CodeUri")) + else codeUri = "" + ) and + ( + if exists(properties.lookup("Runtime")) + then runtime = lookupValue(properties, "Runtime") + else runtime = "" + ) + ) + ) + or + // The `serverless` library, which specifies a top-level `functions` property + framework = "Serverless" and + exists(YamlMapping functions | + functions = resource.lookup("functions") and + not exists(resource.getParentNode()) and + handler = lookupValue(functions.getValue(_), "handler") and + codeUri = "" and + ( + if exists(functions.lookup("Runtime")) + then runtime = lookupValue(functions, "Runtime") + else runtime = "" + ) + ) + ) + } + + /** + * Holds if `handler` = `filePart . astPart` and `filePart` does not contain a `.`. + * This is a convenience predicate, as in many cases the first part of the handler property + * should be interpreted as (the stem of) a file name. + */ + bindingset[handler] + predicate splitHandler(string handler, string filePart, string astPart) { + exists(string pattern | pattern = "(.*?)\\.(.*)" | + filePart = handler.regexpCapture(pattern, 1) and + astPart = handler.regexpCapture(pattern, 2) + ) + } + + /** + * Holds if a file with stem `fileStem` has a serverless handler denoted by `func`. + * + * This is a convenience predicate for the common case where the first part of the + * handler property is the file name. + * + * `func` should be interpreted in a language specific way, see `mapping.md`. + */ + predicate hasServerlessHandler(string fileStem, string func, string framework, string runtime) { + exists(File ymlFile, string handler, string codeUri, string filePart | + hasServerlessHandler(ymlFile, framework, handler, codeUri, runtime) + | + splitHandler(handler, filePart, func) and + fileStem = ymlFile.getParentContainer().getAbsolutePath() + "/" + codeUri + filePart + ) + } +} diff --git a/shared/yaml/codeql/serverless/mapping.md b/shared/yaml/codeql/serverless/mapping.md new file mode 100644 index 00000000000..38f294fc4c1 --- /dev/null +++ b/shared/yaml/codeql/serverless/mapping.md @@ -0,0 +1,81 @@ +# Mapping the `handler` property to a function definition + +## AWS + +[Documentation](https://docs.aws.amazon.com/lambda/latest/dg/welcome.html) + +### Node.js or Typescript +See [documentaion](https://docs.aws.amazon.com/lambda/latest/dg/nodejs-handler.html) + +Setting `handler` to `index.handler` means that `handler` is exported from `index.js`. + +For Typescript, code is first transpiled to JavaScript, see [documentation](https://docs.aws.amazon.com/lambda/latest/dg/lambda-typescript.html). + + +### Python +See [documentation](https://docs.aws.amazon.com/lambda/latest/dg/python-handler.html) + +Setting `handler` to `lambda_function.lambda_handler` means that `def lambda_handler` is found in `lambda_function.py`. + +### Ruby +See [documentation](https://docs.aws.amazon.com/lambda/latest/dg/ruby-handler.html) + +Setting `handler` to `function.handler` means that `def handler` is found in `function.rb`. +Setting `handler` to `source.LambdaFunctions::Handler.process` means that `def self.process` is found inside `class Handler` inside `module LambdaFunctions` in `source.rb`. + +### Java +See [documentation](https://docs.aws.amazon.com/lambda/latest/dg/java-handler.html) + +You can express the hander in the following formats: + +- `package.Class::method` – Full format. For example: example.Handler::handleRequest. + +- `package.Class` – Abbreviated format for functions that implement a handler interface. For example: example.Handler. + +### Go +See [documentation](https://docs.aws.amazon.com/lambda/latest/dg/golang-handler.html) + +When you configure a function in Go, the value of the handler setting is the executable file name. For example, if you set the value of the handler to Handler, Lambda will call the main() function in the Handler executable file. + +### C# +See [documentation](https://docs.aws.amazon.com/lambda/latest/dg/csharp-handler.html) + +`handler` is of this format: `Assembly::Namespace.ClassName::MethodName`. +For example, `HelloWorldApp::Example.Hello::MyHandler` if `public Stream MyHandler` is found inside `public class Hello` inside `namespace Example` in the `HelloWorldApp` assembly. + + +## Aliyun (Alibaba Cloud) +[Properties](https://www.alibabacloud.com/help/en/resource-orchestration-service/latest/aliyun-serverless-function) +[Languages](https://www.alibabacloud.com/help/en/function-compute/latest/programming-languages) + +### Node.js +See [documentation](https://www.alibabacloud.com/help/en/function-compute/latest/node-request-handler) + +The handler must be in the `File name.Method name` format. For example, if your file name is `main.js` and your method name is `handler`, the handler is `main.handler`. + +### Python +See [documentation](https://www.alibabacloud.com/help/en/function-compute/latest/programming-languages-python) + +In Python, your request handler must be in the `File name.Method name` format. For example, if your file name is `main.py` and your method name is `handler`, the handler is `main.handler`. + +### Java +See [documentation](https://www.alibabacloud.com/help/en/function-compute/latest/programming-languages-java) + +The handler must be in the `[Package name].[Class name]::[Method name]` format. For example, if the name of your package is `example`, the class type is `HelloFC`, and method is `handleRequest`, the handler can be configured as `example.HelloFC::handleRequest`. + +### C# +See [documentation](https://www.alibabacloud.com/help/en/function-compute/latest/programming-languages-csharp) + +The handler is in the format of `Assembly::Namespace.ClassName::MethodName`. + +### Go +See [documentation](https://www.alibabacloud.com/help/en/function-compute/latest/go-323505) + +The handler for FC functions in the Go language is compiled into an executable binary file. You only need to set the Request Handler parameter of the FC function to the name of the executable file. + +## Serverless +[Documentation](https://www.serverless.com/framework/docs/providers/aws/guide/functions) + +The handler property points to the file and module containing the code you want to run in your function. + +There seems to be nothing language specific written down about the handler property.