Merge pull request #13729 from yoff/python/model-aws-lambdas

Python/JavaScript: Shared module for serverless functions
This commit is contained in:
yoff
2023-08-16 15:14:08 +02:00
committed by GitHub
13 changed files with 457 additions and 110 deletions

View File

@@ -1,90 +1,20 @@
/**
* Provides classes and predicates for working with serverless handlers.
* E.g. [AWS](https://docs.aws.amazon.com/lambda/latest/dg/nodejs-handler.html) or [serverless](https://npmjs.com/package/serverless)
* In particular a `RemoteFlowSource` is added for AWS, Alibaba, and serverless.
*/
import javascript
import codeql.serverless.ServerLess
private module YamlImpl implements Input {
import semmle.javascript.Files
import semmle.javascript.YAML
}
module SL = ServerLess<YamlImpl>;
/**
* Provides classes and predicates for working with serverless handlers.
* In particular a `RemoteFlowSource` is added for AWS, Alibaba, and serverless.
*/
private module ServerLess {
/**
* Holds if the `.yml` file `ymlFile` contains a serverless configuration with `handler` and `codeURI` properties.
* `codeURI` defaults to the empty string if no explicit value is set in the configuration.
*/
private predicate hasServerlessHandler(File ymlFile, string handler, string codeUri) {
exists(YamlMapping resource | ymlFile = resource.getFile() |
// There exists at least "AWS::Serverless::Function" and "Aliyun::Serverless::Function"
resource.lookup("Type").(YamlScalar).getValue().regexpMatch(".*::Serverless::Function") and
exists(YamlMapping properties | properties = resource.lookup("Properties") |
handler = properties.lookup("Handler").(YamlScalar).getValue() and
if exists(properties.lookup("CodeUri"))
then codeUri = properties.lookup("CodeUri").(YamlScalar).getValue()
else codeUri = ""
)
or
// The `serverless` library, which specifies a top-level `functions` property
exists(YamlMapping functions |
functions = resource.lookup("functions") and
not exists(resource.getParentNode()) and
handler = functions.getValue(_).(YamlMapping).lookup("handler").(YamlScalar).getValue() and
codeUri = ""
)
)
}
/**
* Gets a string where an ending "/." is simplified to "/" (if it exists).
*/
bindingset[base]
private string removeTrailingDot(string base) {
if base.regexpMatch(".*/\\.")
then result = base.substring(0, base.length() - 1)
else result = base
}
/**
* Gets a string where a leading "./" is simplified to "" (if it exists).
*/
bindingset[base]
private string removeLeadingDotSlash(string base) {
if base.regexpMatch("\\./.*") then result = base.substring(2, base.length()) else result = base
}
/**
* Gets a path to a file from a `codeURI` property and a file name from a serverless configuration.
*
* For example if `codeURI` is "function/." and `file` is "index", then the result becomes "function/index.js".
*/
bindingset[codeUri, file]
private string getPathFromHandlerProperties(string codeUri, string file) {
exists(string folder | folder = removeLeadingDotSlash(removeTrailingDot(codeUri)) |
result = folder + file + ".js"
)
}
/**
* Holds if `file` has a serverless handler function with name `func`.
*/
private predicate hasServerlessHandler(File file, string func) {
exists(File ymlFile, string handler, string codeUri, string fileName |
hasServerlessHandler(ymlFile, handler, codeUri) and
// Splits a `handler` into two components. The `fileName` to the left of the dot, and the `func` to the right.
// E.g. if `handler` is "index.foo", then `fileName` is "index" and `func` is "foo".
exists(string pattern | pattern = "(.*)\\.(.*)" |
fileName = handler.regexpCapture(pattern, 1) and
func = handler.regexpCapture(pattern, 2)
)
|
file.getAbsolutePath() =
ymlFile.getParentContainer().getAbsolutePath() + "/" +
getPathFromHandlerProperties(codeUri, fileName)
)
}
/**
* Gets a function that is a serverless request handler.
*
* For example: if an AWS serverless resource contains the following properties (in the "template.yml" file):
@@ -102,19 +32,21 @@ private module ServerLess {
* module.exports.handler = function (event) { ... }
* ```
*/
private DataFlow::FunctionNode getAServerlessHandler() {
exists(File file, string handler, Module mod | hasServerlessHandler(file, handler) |
private DataFlow::FunctionNode getAServerlessHandler() {
exists(File file, string stem, string handler, Module mod |
SL::hasServerlessHandler(stem, handler, _, _) and
file.getAbsolutePath() = stem + ".js"
|
mod.getFile() = file and
result = mod.getAnExportedValue(handler).getAFunctionValue()
)
}
}
/**
/**
* A serverless request handler event, seen as a RemoteFlowSource.
*/
private class ServerlessHandlerEventAsRemoteFlow extends RemoteFlowSource {
private class ServerlessHandlerEventAsRemoteFlow extends RemoteFlowSource {
ServerlessHandlerEventAsRemoteFlow() { this = getAServerlessHandler().getParameter(0) }
override string getSourceType() { result = "Serverless event" }
}
}

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added modeling of AWS Lambda handlers that can be identified with `AWS::Serverless::Function` in YAML files, where the event parameter is modeled as a remote-flow-source.

View File

@@ -49,6 +49,7 @@ private import semmle.python.frameworks.Requests
private import semmle.python.frameworks.RestFramework
private import semmle.python.frameworks.Rsa
private import semmle.python.frameworks.RuamelYaml
private import semmle.python.frameworks.ServerLess
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.SqlAlchemy
private import semmle.python.frameworks.Starlette

View File

@@ -0,0 +1,67 @@
/**
* Provides classes and predicates for working with those serverless handlers,
* handled by the shared library.
*
* E.g. [AWS](https://docs.aws.amazon.com/lambda/latest/dg/python-handler.html).
*
* In particular a `RemoteFlowSource` is added for each.
*/
import python
import codeql.serverless.ServerLess
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.RemoteFlowSources
private module YamlImpl implements Input {
import semmle.python.Files
import semmle.python.Yaml
}
module SL = ServerLess<YamlImpl>;
/**
* Gets a function that is a serverless request handler.
*
* For example: if an AWS serverless resource contains the following properties (in the "template.yml" file):
* ```yaml
* Handler: mylibrary.handler
* Runtime: pythonXXX
* CodeUri: backend/src/
* ```
*
* And a file "mylibrary.py" exists in the folder "backend/src" (relative to the "template.yml" file).
* Then the result of this predicate is a function exported as "handler" from "mylibrary.py".
* The "mylibrary.py" file could for example look like:
*
* ```python
* def handler(event):
* ...
* ```
*/
private Function getAServerlessHandler() {
exists(File file, string stem, string handler, string runtime, Module mod |
SL::hasServerlessHandler(stem, handler, _, runtime) and
file.getAbsolutePath() = stem + ".py" and
// if runtime is specified, it should be python
(runtime = "" or runtime.matches("python%"))
|
mod.getFile() = file and
result.getScope() = mod and
result.getName() = handler
)
}
private DataFlow::ParameterNode getAHandlerEventParameter() {
exists(Function func | func = getAServerlessHandler() |
result.getParameter() in [func.getArg(0), func.getArgByName("event")]
)
}
/**
* A serverless request handler event, seen as a RemoteFlowSource.
*/
private class ServerlessHandlerEventAsRemoteFlow extends RemoteFlowSource::Range {
ServerlessHandlerEventAsRemoteFlow() { this = getAHandlerEventParameter() }
override string getSourceType() { result = "Serverless event" }
}

View File

@@ -0,0 +1,4 @@
failures
argumentToEnsureNotTaintedNotMarkedAsSpurious
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
testFailures

View File

@@ -0,0 +1,2 @@
import experimental.meta.InlineTaintTest
import MakeInlineTaintTest<TestTaintTrackingConfig>

View File

@@ -0,0 +1,13 @@
def handler1(event, context):
ensure_tainted(event) # $ tainted
return "Hello World!"
def handler2(event, context):
ensure_tainted(event) # $ tainted
return "Hello World!"
# This function is not mentioned in template.yml
# and so it is not receiving user input.
def non_handler(event, context):
ensure_not_tainted(event)
return "Hello World!"

View File

@@ -0,0 +1,11 @@
def lambda_handler(event, context):
ensure_tainted(
event, # $ tainted
# event is usually a dict, see https://docs.aws.amazon.com/lambda/latest/dg/python-handler.html
event["key"], # $ tainted
event["key"]["key2"], # $ tainted
event["key"][0], # $ tainted
# but can also be a list
event[0], # $ tainted
)
return "OK"

View File

@@ -0,0 +1,62 @@
# inspired by https://github.com/awsdocs/aws-lambda-developer-guide/blob/main/sample-apps/blank-python/template.yml
# but we have added extra handlers
AWSTemplateFormatVersion: '2010-09-09'
Transform: 'AWS::Serverless-2016-10-31'
Description: An AWS Lambda application that calls the Lambda API.
Resources:
function:
Type: AWS::Serverless::Function
Properties:
Handler: lambda_function.lambda_handler
Runtime: python3.8
CodeUri: function/.
Description: Call the AWS Lambda API
Timeout: 10
# Function's execution role
Policies:
- AWSLambdaBasicExecutionRole
- AWSLambda_ReadOnlyAccess
- AWSXrayWriteOnlyAccess
Tracing: Active
Layers:
- !Ref libs
function:
Type: AWS::Serverless::Function
Properties:
Handler: extra_lambdas.handler1
Runtime: python3.8
CodeUri: function/.
Description: Call the AWS Lambda API
Timeout: 10
# Function's execution role
Policies:
- AWSLambdaBasicExecutionRole
- AWSLambda_ReadOnlyAccess
- AWSXrayWriteOnlyAccess
Tracing: Active
Layers:
- !Ref libs
function:
Type: AWS::Serverless::Function
Properties:
Handler: extra_lambdas.handler2
Runtime: python3.8
CodeUri: function/.
Description: Call the AWS Lambda API
Timeout: 10
# Function's execution role
Policies:
- AWSLambdaBasicExecutionRole
- AWSLambda_ReadOnlyAccess
- AWSXrayWriteOnlyAccess
Tracing: Active
Layers:
- !Ref libs
libs:
Type: AWS::Serverless::LayerVersion
Properties:
LayerName: blank-python-lib
Description: Dependencies for the blank-python sample app.
ContentUri: package/.
CompatibleRuntimes:
- python3.8

View File

@@ -0,0 +1 @@
semmle-extractor-options: -R .

View File

@@ -0,0 +1,4 @@
---
category: feature
---
* Added library for serverless functions. Currently used by JavaScript and Python.

View File

@@ -0,0 +1,165 @@
/**
* Provides classes and predicates for working with serverless handlers.
* E.g. [AWS](https://docs.aws.amazon.com/lambda/latest/dg/nodejs-handler.html) or [serverless](https://npmjs.com/package/serverless)
*/
/**
* Provides the input for the `ServerLess` module.
* Most of these should be provided by the `yaml` library.
*/
signature module Input {
// --------------------------------------------------
// The below should be provided by the `yaml` library.
// --------------------------------------------------
class Container {
string getAbsolutePath();
Container getParentContainer();
}
class File extends Container;
class YamlNode {
File getFile();
YamlCollection getParentNode();
}
class YamlValue extends YamlNode;
class YamlCollection extends YamlValue;
class YamlScalar extends YamlValue {
string getValue();
}
class YamlMapping extends YamlCollection {
YamlValue lookup(string key);
YamlValue getValue(int i);
}
}
/**
* Provides classes and predicates for working with serverless handlers.
* Supports AWS, Alibaba, and serverless.
*
* Common usage is to interpret the handlers as functions and add the
* first argument of these as remote flow sources.
*/
module ServerLess<Input I> {
import I
/**
* Gets the looked up value as a convenience.
*/
pragma[inline]
private string lookupValue(YamlMapping mapping, string property) {
result = mapping.lookup(property).(YamlScalar).getValue()
}
/**
* Gets the looked up value if it exists or
* the empty string if it does not.
*/
bindingset[property]
pragma[inline]
private string lookupValueOrEmpty(YamlMapping mapping, string property) {
if exists(mapping.lookup(property))
then result = mapping.lookup(property).(YamlScalar).getValue()
else result = ""
}
/**
* Gets a string where an ending "/." is simplified to "/" (if it exists).
*/
bindingset[base]
private string removeTrailingDot(string base) {
if base.regexpMatch(".*/\\.")
then result = base.substring(0, base.length() - 1)
else result = base
}
/**
* Gets a string where a leading "./" is simplified to "" (if it exists).
*/
bindingset[base]
private string removeLeadingDotSlash(string base) {
if base.regexpMatch("\\./.*") then result = base.substring(2, base.length()) else result = base
}
/**
* Gets a string suitable as part of a file path.
*
* Maps the empty string to the empty string.
*/
bindingset[base]
private string normalizePath(string base) {
result = removeLeadingDotSlash(removeTrailingDot(base))
}
/**
* Holds if the `.yml` file `ymlFile` contains a serverless configuration from `framework` with
* `handler`, `codeURI`, and `runtime` properties.
* `codeURI` and `runtime` default to the empty string if no explicit value is set in the configuration.
*
* `handler` should be interpreted in a language specific way, see `mapping.md`.
*/
predicate hasServerlessHandler(
File ymlFile, string framework, string handler, string codeUri, string runtime
) {
exists(YamlMapping resource | ymlFile = resource.getFile() |
// Official AWS API uses "AWS::Serverless::Function" but we've seen that Aliyun uses the same schema ("Aliyun::Serverless::Function"), so we allow any prefix to be used.
// Note that "AWS::Serverless::Function" expands to a "AWS::Lambda::Function" when deployed (described here: https://github.com/aws/serverless-application-model#getting-started). Also note that a "AWS::Lambda::Function" requires code in its definition, so needs different handling (see https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-lambda-function.html)
resource.lookup("Type").(YamlScalar).getValue().regexpMatch(".*::Serverless::Function") and
framework = lookupValue(resource, "Type") and
exists(YamlMapping properties | properties = resource.lookup("Properties") |
(
handler = lookupValue(properties, "Handler") and
codeUri = normalizePath(lookupValueOrEmpty(properties, "CodeUri")) and
runtime = lookupValueOrEmpty(properties, "Runtime")
)
)
or
// The `serverless` library, which specifies a top-level `functions` property
framework = "Serverless" and
exists(YamlMapping functions |
functions = resource.lookup("functions") and
not exists(resource.getParentNode()) and
handler = lookupValue(functions.getValue(_), "handler") and
codeUri = "" and
runtime = lookupValueOrEmpty(functions, "Runtime")
)
)
}
/**
* Holds if `handler` = `filePart . astPart` and `filePart` does not contain a `.`.
* This is a convenience predicate, as in many cases the first part of the handler property
* should be interpreted as (the stem of) a file name.
*/
bindingset[handler]
predicate splitHandler(string handler, string filePart, string astPart) {
exists(string pattern | pattern = "(.*?)\\.(.*)" |
filePart = handler.regexpCapture(pattern, 1) and
astPart = handler.regexpCapture(pattern, 2)
)
}
/**
* Holds if a file with path `pathNoExt` (+ some extension) has a serverless handler denoted by `func`.
*
* This is a convenience predicate for the common case where the first part of the
* handler property is the file name.
*
* `func` should be interpreted in a language specific way, see `mapping.md`.
*/
predicate hasServerlessHandler(string pathNoExt, string func, string framework, string runtime) {
exists(File ymlFile, string handler, string codeUri, string filePart |
hasServerlessHandler(ymlFile, framework, handler, codeUri, runtime)
|
splitHandler(handler, filePart, func) and
pathNoExt = ymlFile.getParentContainer().getAbsolutePath() + "/" + codeUri + filePart
)
}
}

View File

@@ -0,0 +1,81 @@
# Mapping the `handler` property to a function definition
## AWS
[Documentation](https://docs.aws.amazon.com/lambda/latest/dg/welcome.html)
### Node.js or Typescript
See [documentaion](https://docs.aws.amazon.com/lambda/latest/dg/nodejs-handler.html)
Setting `handler` to `index.handler` means that `handler` is exported from `index.js`.
For Typescript, code is first transpiled to JavaScript, see [documentation](https://docs.aws.amazon.com/lambda/latest/dg/lambda-typescript.html).
### Python
See [documentation](https://docs.aws.amazon.com/lambda/latest/dg/python-handler.html)
Setting `handler` to `lambda_function.lambda_handler` means that `def lambda_handler` is found in `lambda_function.py`.
### Ruby
See [documentation](https://docs.aws.amazon.com/lambda/latest/dg/ruby-handler.html)
Setting `handler` to `function.handler` means that `def handler` is found in `function.rb`.
Setting `handler` to `source.LambdaFunctions::Handler.process` means that `def self.process` is found inside `class Handler` inside `module LambdaFunctions` in `source.rb`.
### Java
See [documentation](https://docs.aws.amazon.com/lambda/latest/dg/java-handler.html)
You can express the hander in the following formats:
- `package.Class::method` Full format. For example: `example.Handler::handleRequest`.
- `package.Class` Abbreviated format for functions that implement a handler interface. For example: `example.Handler`.
### Go
See [documentation](https://docs.aws.amazon.com/lambda/latest/dg/golang-handler.html)
When you configure a function in Go, the value of the handler setting is the executable file name. For example, if you set the value of the handler to `Handler`, Lambda will call the `main()` function in the `Handler` executable file.
### C#
See [documentation](https://docs.aws.amazon.com/lambda/latest/dg/csharp-handler.html)
`handler` is of this format: `Assembly::Namespace.ClassName::MethodName`.
For example, `HelloWorldApp::Example.Hello::MyHandler` if `public Stream MyHandler` is found inside `public class Hello` inside `namespace Example` in the `HelloWorldApp` assembly.
## Aliyun (Alibaba Cloud)
[Properties](https://www.alibabacloud.com/help/en/resource-orchestration-service/latest/aliyun-serverless-function)
[Languages](https://www.alibabacloud.com/help/en/function-compute/latest/programming-languages)
### Node.js
See [documentation](https://www.alibabacloud.com/help/en/function-compute/latest/node-request-handler)
The handler must be in the `File name.Method name` format. For example, if your file name is `main.js` and your method name is `handler`, the handler is `main.handler`.
### Python
See [documentation](https://www.alibabacloud.com/help/en/function-compute/latest/programming-languages-python)
In Python, your request handler must be in the `File name.Method name` format. For example, if your file name is `main.py` and your method name is `handler`, the handler is `main.handler`.
### Java
See [documentation](https://www.alibabacloud.com/help/en/function-compute/latest/programming-languages-java)
The handler must be in the `[Package name].[Class name]::[Method name]` format. For example, if the name of your package is `example`, the class type is `HelloFC`, and method is `handleRequest`, the handler can be configured as `example.HelloFC::handleRequest`.
### C#
See [documentation](https://www.alibabacloud.com/help/en/function-compute/latest/programming-languages-csharp)
The handler is in the format of `Assembly::Namespace.ClassName::MethodName`.
### Go
See [documentation](https://www.alibabacloud.com/help/en/function-compute/latest/go-323505)
The handler for FC functions in the Go language is compiled into an executable binary file. You only need to set the Request Handler parameter of the FC function to the name of the executable file.
## Serverless
[Documentation](https://www.serverless.com/framework/docs/providers/aws/guide/functions)
The handler property points to the file and module containing the code you want to run in your function.
There seems to be nothing language specific written down about the handler property.