Merge branch 'jorgectf/python/deserialization' of https://github.com/jorgectf/codeql into jorgectf/python/deserialization

This commit is contained in:
jorgectf
2022-01-31 17:48:47 +01:00
3887 changed files with 317569 additions and 114448 deletions

View File

@@ -1,4 +1,6 @@
name: codeql/python-examples
version: 0.0.2
groups:
- python
- examples
dependencies:
codeql/python-all: "*"
codeql/python-all: "*"

View File

@@ -0,0 +1,25 @@
## 0.0.7
## 0.0.6
## 0.0.5
### Minor Analysis Improvements
* Added modeling of many functions from the `os` module that uses file system paths, such as `os.stat`, `os.chdir`, `os.mkdir`, and so on.
* Added modeling of the `tempfile` module for creating temporary files and directories, such as the functions `tempfile.NamedTemporaryFile` and `tempfile.TemporaryDirectory`.
* Extended the modeling of FastAPI such that custom subclasses of `fastapi.APIRouter` are recognized.
* Extended the modeling of FastAPI such that `fastapi.responses.FileResponse` are considered `FileSystemAccess`.
* Added modeling of the `posixpath`, `ntpath`, and `genericpath` modules for path operations (although these are not supposed to be used), resulting in new sinks.
* Added modeling of `wsgiref.simple_server` applications, leading to new remote flow sources.
## 0.0.4
### Major Analysis Improvements
* Added modeling of `os.stat`, `os.lstat`, `os.statvfs`, `os.fstat`, and `os.fstatvfs`, which are new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of the `posixpath`, `ntpath`, and `genericpath` modules for path operations (although these are not supposed to be used), resulting in new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of `wsgiref.simple_server` applications, leading to new remote flow sources.
* Added modeling of `aiopg` for sinks executing SQL.
* Added modeling of HTTP requests and responses when using `flask_admin` (`Flask-Admin` PyPI package), which leads to additional remote flow sources.
* Added modeling of the PyPI package `toml`, which provides encoding/decoding of TOML documents, leading to new taint-tracking steps.

View File

@@ -0,0 +1,4 @@
---
category: deprecated
---
* The `codeql/python-upgrades` CodeQL pack has been removed. All upgrades scripts have been merged into the `codeql/python-all` CodeQL pack.

View File

@@ -0,0 +1,4 @@
---
category: deprecated
---
* Moved the files defining regex injection configuration and customization, instead of `import semmle.python.security.injection.RegexInjection` please use `import semmle.python.security.dataflow.RegexInjection` (the same for `RegexInjectionCustomizations`).

View File

@@ -0,0 +1,10 @@
## 0.0.4
### Major Analysis Improvements
* Added modeling of `os.stat`, `os.lstat`, `os.statvfs`, `os.fstat`, and `os.fstatvfs`, which are new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of the `posixpath`, `ntpath`, and `genericpath` modules for path operations (although these are not supposed to be used), resulting in new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of `wsgiref.simple_server` applications, leading to new remote flow sources.
* Added modeling of `aiopg` for sinks executing SQL.
* Added modeling of HTTP requests and responses when using `flask_admin` (`Flask-Admin` PyPI package), which leads to additional remote flow sources.
* Added modeling of the PyPI package `toml`, which provides encoding/decoding of TOML documents, leading to new taint-tracking steps.

View File

@@ -0,0 +1,10 @@
## 0.0.5
### Minor Analysis Improvements
* Added modeling of many functions from the `os` module that uses file system paths, such as `os.stat`, `os.chdir`, `os.mkdir`, and so on.
* Added modeling of the `tempfile` module for creating temporary files and directories, such as the functions `tempfile.NamedTemporaryFile` and `tempfile.TemporaryDirectory`.
* Extended the modeling of FastAPI such that custom subclasses of `fastapi.APIRouter` are recognized.
* Extended the modeling of FastAPI such that `fastapi.responses.FileResponse` are considered `FileSystemAccess`.
* Added modeling of the `posixpath`, `ntpath`, and `genericpath` modules for path operations (although these are not supposed to be used), resulting in new sinks.
* Added modeling of `wsgiref.simple_server` applications, leading to new remote flow sources.

View File

@@ -0,0 +1 @@
## 0.0.6

View File

@@ -0,0 +1 @@
## 0.0.7

View File

@@ -0,0 +1,2 @@
---
lastReleaseVersion: 0.0.7

View File

@@ -0,0 +1,6 @@
description: Add new statements and expressions for the match syntax.
compatibility: backwards
py_exprs.rel: run py_exprs.qlo
py_stmts.rel: run py_stmts.qlo
py_patterns.rel: delete
py_patterns_lists.rel: delete

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,42 @@
// First we need to wrap some database types
class Location extends @location {
/** Gets the start line of this location */
int getStartLine() {
locations_default(this, _, result, _, _, _) or
locations_ast(this, _, result, _, _, _)
}
string toString() { result = "<some file>" + ":" + this.getStartLine().toString() }
}
class Expr_ extends @py_expr {
string toString() { result = "Expr" }
Location getLocation() { py_locations(result, this) }
}
class ExprParent_ extends @py_expr_parent {
string toString() { result = "ExprParent" }
}
/**
* New kinds have been inserted such that
* `@py_Name` which used to have index 18 now has index 19.
* Entries with lower indices are unchanged.
*/
bindingset[new_index]
int old_index(int new_index) {
if new_index < 18 then result = new_index else result + (19 - 18) = new_index
}
// The schema for py_exprs is:
//
// py_exprs(unique int id : @py_expr,
// int kind: int ref,
// int parent : @py_expr_parent ref,
// int idx : int ref);
from Expr_ expr, int new_kind, ExprParent_ parent, int idx, int old_kind
where
py_exprs(expr, new_kind, parent, idx) and
old_kind = old_index(new_kind)
select expr, old_kind, parent, idx

View File

@@ -0,0 +1,42 @@
// First we need to wrap some database types
class Location extends @location {
/** Gets the start line of this location */
int getStartLine() {
locations_default(this, _, result, _, _, _) or
locations_ast(this, _, result, _, _, _)
}
string toString() { result = "<some file>" + ":" + this.getStartLine().toString() }
}
class Stmt_ extends @py_stmt {
string toString() { result = "Stmt" }
Location getLocation() { py_locations(result, this) }
}
class StmtList_ extends @py_stmt_list {
string toString() { result = "StmtList" }
}
/**
* New kinds have been inserted such that
* `@py_Nonlocal` which used to have index 14 now has index 16.
* Entries with lower indices are unchanged.
*/
bindingset[new_index]
int old_index(int new_index) {
if new_index < 14 then result = new_index else result + (16 - 14) = new_index
}
// The schema for py_stmts is:
//
// py_stmts(unique int id : @py_stmt,
// int kind: int ref,
// int parent : @py_stmt_list ref,
// int idx : int ref);
from Stmt_ expr, int new_kind, StmtList_ parent, int idx, int old_kind
where
py_stmts(expr, new_kind, parent, idx) and
old_kind = old_index(new_kind)
select expr, old_kind, parent, idx

View File

@@ -0,0 +1,994 @@
/*
* This dbscheme is auto-generated by 'semmle/dbscheme_gen.py'.
* WARNING: Any modifications to this file will be lost.
* Relations can be changed by modifying master.py or
* by adding rules to dbscheme.template
*/
/* This is a dummy line to alter the dbscheme, so we can make a database upgrade
* without actually changing any of the dbscheme predicates. It contains a date
* to allow for such updates in the future as well.
*
* 2020-07-02
*
* DO NOT remove this comment carelessly, since it can revert the dbscheme back to a
* previously seen state (matching a previously seen SHA), which would make the upgrade
* mechanism not work properly.
*/
/*
* External artifacts
*/
externalDefects(
unique int id : @externalDefect,
varchar(900) queryPath : string ref,
int location : @location ref,
varchar(900) message : string ref,
float severity : float ref
);
externalMetrics(
unique int id : @externalMetric,
varchar(900) queryPath : string ref,
int location : @location ref,
float value : float ref
);
externalData(
int id : @externalDataElement,
varchar(900) queryPath : string ref,
int column: int ref,
varchar(900) data : string ref
);
snapshotDate(unique date snapshotDate : date ref);
sourceLocationPrefix(varchar(900) prefix : string ref);
/*
* Duplicate code
*/
duplicateCode(
unique int id : @duplication,
varchar(900) relativePath : string ref,
int equivClass : int ref);
similarCode(
unique int id : @similarity,
varchar(900) relativePath : string ref,
int equivClass : int ref);
@duplication_or_similarity = @duplication | @similarity
tokens(
int id : @duplication_or_similarity ref,
int offset : int ref,
int beginLine : int ref,
int beginColumn : int ref,
int endLine : int ref,
int endColumn : int ref);
/*
* Line metrics
*/
py_codelines(int id : @py_scope ref,
int count : int ref);
py_commentlines(int id : @py_scope ref,
int count : int ref);
py_docstringlines(int id : @py_scope ref,
int count : int ref);
py_alllines(int id : @py_scope ref,
int count : int ref);
/*
* Version history
*/
svnentries(
int id : @svnentry,
varchar(500) revision : string ref,
varchar(500) author : string ref,
date revisionDate : date ref,
int changeSize : int ref
)
svnaffectedfiles(
int id : @svnentry ref,
int file : @file ref,
varchar(500) action : string ref
)
svnentrymsg(
int id : @svnentry ref,
varchar(500) message : string ref
)
svnchurn(
int commit : @svnentry ref,
int file : @file ref,
int addedLines : int ref,
int deletedLines : int ref
)
/****************************
Python dbscheme
****************************/
files(unique int id: @file,
varchar(900) name: string ref);
folders(unique int id: @folder,
varchar(900) name: string ref);
@container = @folder | @file;
containerparent(int parent: @container ref,
unique int child: @container ref);
@sourceline = @file | @py_Module | @xmllocatable;
numlines(int element_id: @sourceline ref,
int num_lines: int ref,
int num_code: int ref,
int num_comment: int ref
);
@location = @location_ast | @location_default ;
locations_default(unique int id: @location_default,
int file: @file ref,
int beginLine: int ref,
int beginColumn: int ref,
int endLine: int ref,
int endColumn: int ref);
locations_ast(unique int id: @location_ast,
int module: @py_Module ref,
int beginLine: int ref,
int beginColumn: int ref,
int endLine: int ref,
int endColumn: int ref);
file_contents(unique int file: @file ref, string contents: string ref);
py_module_path(int module: @py_Module ref, int file: @container ref);
variable(unique int id : @py_variable,
int scope : @py_scope ref,
varchar(1) name : string ref);
py_line_lengths(unique int id : @py_line,
int file: @py_Module ref,
int line : int ref,
int length : int ref);
py_extracted_version(int module : @py_Module ref,
varchar(1) version : string ref);
/* AUTO GENERATED PART STARTS HERE */
/* <Field> AnnAssign.location = 0, location */
/* <Field> AnnAssign.value = 1, expr */
/* <Field> AnnAssign.annotation = 2, expr */
/* <Field> AnnAssign.target = 3, expr */
/* <Field> Assert.location = 0, location */
/* <Field> Assert.test = 1, expr */
/* <Field> Assert.msg = 2, expr */
/* <Field> Assign.location = 0, location */
/* <Field> Assign.value = 1, expr */
/* <Field> Assign.targets = 2, expr_list */
/* <Field> AssignExpr.location = 0, location */
/* <Field> AssignExpr.parenthesised = 1, bool */
/* <Field> AssignExpr.value = 2, expr */
/* <Field> AssignExpr.target = 3, expr */
/* <Field> Attribute.location = 0, location */
/* <Field> Attribute.parenthesised = 1, bool */
/* <Field> Attribute.value = 2, expr */
/* <Field> Attribute.attr = 3, str */
/* <Field> Attribute.ctx = 4, expr_context */
/* <Field> AugAssign.location = 0, location */
/* <Field> AugAssign.operation = 1, BinOp */
/* <Field> Await.location = 0, location */
/* <Field> Await.parenthesised = 1, bool */
/* <Field> Await.value = 2, expr */
/* <Field> BinaryExpr.location = 0, location */
/* <Field> BinaryExpr.parenthesised = 1, bool */
/* <Field> BinaryExpr.left = 2, expr */
/* <Field> BinaryExpr.op = 3, operator */
/* <Field> BinaryExpr.right = 4, expr */
/* <Parent> BinaryExpr = AugAssign */
/* <Field> BoolExpr.location = 0, location */
/* <Field> BoolExpr.parenthesised = 1, bool */
/* <Field> BoolExpr.op = 2, boolop */
/* <Field> BoolExpr.values = 3, expr_list */
/* <Field> Break.location = 0, location */
/* <Field> Bytes.location = 0, location */
/* <Field> Bytes.parenthesised = 1, bool */
/* <Field> Bytes.s = 2, bytes */
/* <Field> Bytes.prefix = 3, bytes */
/* <Field> Bytes.implicitly_concatenated_parts = 4, StringPart_list */
/* <Field> Call.location = 0, location */
/* <Field> Call.parenthesised = 1, bool */
/* <Field> Call.func = 2, expr */
/* <Field> Call.positional_args = 3, expr_list */
/* <Field> Call.named_args = 4, dict_item_list */
/* <Field> Class.name = 0, str */
/* <Field> Class.body = 1, stmt_list */
/* <Parent> Class = ClassExpr */
/* <Field> ClassExpr.location = 0, location */
/* <Field> ClassExpr.parenthesised = 1, bool */
/* <Field> ClassExpr.name = 2, str */
/* <Field> ClassExpr.bases = 3, expr_list */
/* <Field> ClassExpr.keywords = 4, dict_item_list */
/* <Field> ClassExpr.inner_scope = 5, Class */
/* <Field> Compare.location = 0, location */
/* <Field> Compare.parenthesised = 1, bool */
/* <Field> Compare.left = 2, expr */
/* <Field> Compare.ops = 3, cmpop_list */
/* <Field> Compare.comparators = 4, expr_list */
/* <Field> Continue.location = 0, location */
/* <Field> Delete.location = 0, location */
/* <Field> Delete.targets = 1, expr_list */
/* <Field> Dict.location = 0, location */
/* <Field> Dict.parenthesised = 1, bool */
/* <Field> Dict.items = 2, dict_item_list */
/* <Field> DictComp.location = 0, location */
/* <Field> DictComp.parenthesised = 1, bool */
/* <Field> DictComp.function = 2, Function */
/* <Field> DictComp.iterable = 3, expr */
/* <Field> DictUnpacking.location = 0, location */
/* <Field> DictUnpacking.value = 1, expr */
/* <Field> Ellipsis.location = 0, location */
/* <Field> Ellipsis.parenthesised = 1, bool */
/* <Field> ExceptStmt.location = 0, location */
/* <Field> ExceptStmt.type = 1, expr */
/* <Field> ExceptStmt.name = 2, expr */
/* <Field> ExceptStmt.body = 3, stmt_list */
/* <Field> Exec.location = 0, location */
/* <Field> Exec.body = 1, expr */
/* <Field> Exec.globals = 2, expr */
/* <Field> Exec.locals = 3, expr */
/* <Field> ExprStmt.location = 0, location */
/* <Field> ExprStmt.value = 1, expr */
/* <Field> Filter.location = 0, location */
/* <Field> Filter.parenthesised = 1, bool */
/* <Field> Filter.value = 2, expr */
/* <Field> Filter.filter = 3, expr */
/* <Field> For.location = 0, location */
/* <Field> For.target = 1, expr */
/* <Field> For.iter = 2, expr */
/* <Field> For.body = 3, stmt_list */
/* <Field> For.orelse = 4, stmt_list */
/* <Field> For.is_async = 5, bool */
/* <Field> FormattedValue.location = 0, location */
/* <Field> FormattedValue.parenthesised = 1, bool */
/* <Field> FormattedValue.value = 2, expr */
/* <Field> FormattedValue.conversion = 3, str */
/* <Field> FormattedValue.format_spec = 4, JoinedStr */
/* <Field> Function.name = 0, str */
/* <Field> Function.args = 1, parameter_list */
/* <Field> Function.vararg = 2, expr */
/* <Field> Function.kwonlyargs = 3, expr_list */
/* <Field> Function.kwarg = 4, expr */
/* <Field> Function.body = 5, stmt_list */
/* <Field> Function.is_async = 6, bool */
/* <Parent> Function = FunctionParent */
/* <Field> FunctionExpr.location = 0, location */
/* <Field> FunctionExpr.parenthesised = 1, bool */
/* <Field> FunctionExpr.name = 2, str */
/* <Field> FunctionExpr.args = 3, arguments */
/* <Field> FunctionExpr.returns = 4, expr */
/* <Field> FunctionExpr.inner_scope = 5, Function */
/* <Field> GeneratorExp.location = 0, location */
/* <Field> GeneratorExp.parenthesised = 1, bool */
/* <Field> GeneratorExp.function = 2, Function */
/* <Field> GeneratorExp.iterable = 3, expr */
/* <Field> Global.location = 0, location */
/* <Field> Global.names = 1, str_list */
/* <Field> If.location = 0, location */
/* <Field> If.test = 1, expr */
/* <Field> If.body = 2, stmt_list */
/* <Field> If.orelse = 3, stmt_list */
/* <Field> IfExp.location = 0, location */
/* <Field> IfExp.parenthesised = 1, bool */
/* <Field> IfExp.test = 2, expr */
/* <Field> IfExp.body = 3, expr */
/* <Field> IfExp.orelse = 4, expr */
/* <Field> Import.location = 0, location */
/* <Field> Import.names = 1, alias_list */
/* <Field> ImportExpr.location = 0, location */
/* <Field> ImportExpr.parenthesised = 1, bool */
/* <Field> ImportExpr.level = 2, int */
/* <Field> ImportExpr.name = 3, str */
/* <Field> ImportExpr.top = 4, bool */
/* <Field> ImportStar.location = 0, location */
/* <Field> ImportStar.module = 1, expr */
/* <Field> ImportMember.location = 0, location */
/* <Field> ImportMember.parenthesised = 1, bool */
/* <Field> ImportMember.module = 2, expr */
/* <Field> ImportMember.name = 3, str */
/* <Field> Fstring.location = 0, location */
/* <Field> Fstring.parenthesised = 1, bool */
/* <Field> Fstring.values = 2, expr_list */
/* <Parent> Fstring = FormattedValue */
/* <Field> KeyValuePair.location = 0, location */
/* <Field> KeyValuePair.value = 1, expr */
/* <Field> KeyValuePair.key = 2, expr */
/* <Field> Lambda.location = 0, location */
/* <Field> Lambda.parenthesised = 1, bool */
/* <Field> Lambda.args = 2, arguments */
/* <Field> Lambda.inner_scope = 3, Function */
/* <Field> List.location = 0, location */
/* <Field> List.parenthesised = 1, bool */
/* <Field> List.elts = 2, expr_list */
/* <Field> List.ctx = 3, expr_context */
/* <Field> ListComp.location = 0, location */
/* <Field> ListComp.parenthesised = 1, bool */
/* <Field> ListComp.function = 2, Function */
/* <Field> ListComp.iterable = 3, expr */
/* <Field> ListComp.generators = 4, comprehension_list */
/* <Field> ListComp.elt = 5, expr */
/* <Field> Module.name = 0, str */
/* <Field> Module.hash = 1, str */
/* <Field> Module.body = 2, stmt_list */
/* <Field> Module.kind = 3, str */
/* <Field> Name.location = 0, location */
/* <Field> Name.parenthesised = 1, bool */
/* <Field> Name.variable = 2, variable */
/* <Field> Name.ctx = 3, expr_context */
/* <Parent> Name = ParameterList */
/* <Field> Nonlocal.location = 0, location */
/* <Field> Nonlocal.names = 1, str_list */
/* <Field> Num.location = 0, location */
/* <Field> Num.parenthesised = 1, bool */
/* <Field> Num.n = 2, number */
/* <Field> Num.text = 3, number */
/* <Field> Pass.location = 0, location */
/* <Field> PlaceHolder.location = 0, location */
/* <Field> PlaceHolder.parenthesised = 1, bool */
/* <Field> PlaceHolder.variable = 2, variable */
/* <Field> PlaceHolder.ctx = 3, expr_context */
/* <Field> Print.location = 0, location */
/* <Field> Print.dest = 1, expr */
/* <Field> Print.values = 2, expr_list */
/* <Field> Print.nl = 3, bool */
/* <Field> Raise.location = 0, location */
/* <Field> Raise.exc = 1, expr */
/* <Field> Raise.cause = 2, expr */
/* <Field> Raise.type = 3, expr */
/* <Field> Raise.inst = 4, expr */
/* <Field> Raise.tback = 5, expr */
/* <Field> Repr.location = 0, location */
/* <Field> Repr.parenthesised = 1, bool */
/* <Field> Repr.value = 2, expr */
/* <Field> Return.location = 0, location */
/* <Field> Return.value = 1, expr */
/* <Field> Set.location = 0, location */
/* <Field> Set.parenthesised = 1, bool */
/* <Field> Set.elts = 2, expr_list */
/* <Field> SetComp.location = 0, location */
/* <Field> SetComp.parenthesised = 1, bool */
/* <Field> SetComp.function = 2, Function */
/* <Field> SetComp.iterable = 3, expr */
/* <Field> Slice.location = 0, location */
/* <Field> Slice.parenthesised = 1, bool */
/* <Field> Slice.start = 2, expr */
/* <Field> Slice.stop = 3, expr */
/* <Field> Slice.step = 4, expr */
/* <Field> SpecialOperation.location = 0, location */
/* <Field> SpecialOperation.parenthesised = 1, bool */
/* <Field> SpecialOperation.name = 2, str */
/* <Field> SpecialOperation.arguments = 3, expr_list */
/* <Field> Starred.location = 0, location */
/* <Field> Starred.parenthesised = 1, bool */
/* <Field> Starred.value = 2, expr */
/* <Field> Starred.ctx = 3, expr_context */
/* <Field> Str.location = 0, location */
/* <Field> Str.parenthesised = 1, bool */
/* <Field> Str.s = 2, str */
/* <Field> Str.prefix = 3, str */
/* <Field> Str.implicitly_concatenated_parts = 4, StringPart_list */
/* <Field> StringPart.text = 0, str */
/* <Field> StringPart.location = 1, location */
/* <Parent> StringPart = StringPartList */
/* <Parent> StringPartList = BytesOrStr */
/* <Field> Subscript.location = 0, location */
/* <Field> Subscript.parenthesised = 1, bool */
/* <Field> Subscript.value = 2, expr */
/* <Field> Subscript.index = 3, expr */
/* <Field> Subscript.ctx = 4, expr_context */
/* <Field> TemplateDottedNotation.location = 0, location */
/* <Field> TemplateDottedNotation.parenthesised = 1, bool */
/* <Field> TemplateDottedNotation.value = 2, expr */
/* <Field> TemplateDottedNotation.attr = 3, str */
/* <Field> TemplateDottedNotation.ctx = 4, expr_context */
/* <Field> TemplateWrite.location = 0, location */
/* <Field> TemplateWrite.value = 1, expr */
/* <Field> Try.location = 0, location */
/* <Field> Try.body = 1, stmt_list */
/* <Field> Try.orelse = 2, stmt_list */
/* <Field> Try.handlers = 3, stmt_list */
/* <Field> Try.finalbody = 4, stmt_list */
/* <Field> Tuple.location = 0, location */
/* <Field> Tuple.parenthesised = 1, bool */
/* <Field> Tuple.elts = 2, expr_list */
/* <Field> Tuple.ctx = 3, expr_context */
/* <Parent> Tuple = ParameterList */
/* <Field> UnaryExpr.location = 0, location */
/* <Field> UnaryExpr.parenthesised = 1, bool */
/* <Field> UnaryExpr.op = 2, unaryop */
/* <Field> UnaryExpr.operand = 3, expr */
/* <Field> While.location = 0, location */
/* <Field> While.test = 1, expr */
/* <Field> While.body = 2, stmt_list */
/* <Field> While.orelse = 3, stmt_list */
/* <Field> With.location = 0, location */
/* <Field> With.context_expr = 1, expr */
/* <Field> With.optional_vars = 2, expr */
/* <Field> With.body = 3, stmt_list */
/* <Field> With.is_async = 4, bool */
/* <Field> Yield.location = 0, location */
/* <Field> Yield.parenthesised = 1, bool */
/* <Field> Yield.value = 2, expr */
/* <Field> YieldFrom.location = 0, location */
/* <Field> YieldFrom.parenthesised = 1, bool */
/* <Field> YieldFrom.value = 2, expr */
/* <Field> Alias.value = 0, expr */
/* <Field> Alias.asname = 1, expr */
/* <Parent> Alias = AliasList */
/* <Parent> AliasList = Import */
/* <Field> Arguments.kw_defaults = 0, expr_list */
/* <Field> Arguments.defaults = 1, expr_list */
/* <Field> Arguments.annotations = 2, expr_list */
/* <Field> Arguments.varargannotation = 3, expr */
/* <Field> Arguments.kwargannotation = 4, expr */
/* <Field> Arguments.kw_annotations = 5, expr_list */
/* <Parent> Arguments = ArgumentsParent */
/* <Parent> boolean = BoolParent */
/* <Parent> Boolop = BoolExpr */
/* <Parent> string = Bytes */
/* <Parent> Cmpop = CmpopList */
/* <Parent> CmpopList = Compare */
/* <Field> Comprehension.location = 0, location */
/* <Field> Comprehension.iter = 1, expr */
/* <Field> Comprehension.target = 2, expr */
/* <Field> Comprehension.ifs = 3, expr_list */
/* <Parent> Comprehension = ComprehensionList */
/* <Parent> ComprehensionList = ListComp */
/* <Parent> DictItem = DictItemList */
/* <Parent> DictItemList = DictItemListParent */
/* <Field> Expr.location = 0, location */
/* <Field> Expr.parenthesised = 1, bool */
/* <Parent> Expr = ExprParent */
/* <Parent> ExprContext = ExprContextParent */
/* <Parent> ExprList = ExprListParent */
/* <Parent> int = ImportExpr */
/* <Field> Keyword.location = 0, location */
/* <Field> Keyword.value = 1, expr */
/* <Field> Keyword.arg = 2, str */
/* <Parent> Location = LocationParent */
/* <Parent> string = Num */
/* <Parent> Operator = BinaryExpr */
/* <Parent> ParameterList = Function */
/* <Field> Stmt.location = 0, location */
/* <Parent> Stmt = StmtList */
/* <Parent> StmtList = StmtListParent */
/* <Parent> string = StrParent */
/* <Parent> StringList = StrListParent */
/* <Parent> Unaryop = UnaryExpr */
/* <Parent> Variable = VariableParent */
py_Classes(unique int id : @py_Class,
unique int parent : @py_ClassExpr ref);
py_Functions(unique int id : @py_Function,
unique int parent : @py_Function_parent ref);
py_Modules(unique int id : @py_Module);
py_StringParts(unique int id : @py_StringPart,
int parent : @py_StringPart_list ref,
int idx : int ref);
py_StringPart_lists(unique int id : @py_StringPart_list,
unique int parent : @py_Bytes_or_Str ref);
py_aliases(unique int id : @py_alias,
int parent : @py_alias_list ref,
int idx : int ref);
py_alias_lists(unique int id : @py_alias_list,
unique int parent : @py_Import ref);
py_arguments(unique int id : @py_arguments,
unique int parent : @py_arguments_parent ref);
py_bools(int parent : @py_bool_parent ref,
int idx : int ref);
py_boolops(unique int id : @py_boolop,
int kind: int ref,
unique int parent : @py_BoolExpr ref);
py_bytes(varchar(1) id : string ref,
int parent : @py_Bytes ref,
int idx : int ref);
py_cmpops(unique int id : @py_cmpop,
int kind: int ref,
int parent : @py_cmpop_list ref,
int idx : int ref);
py_cmpop_lists(unique int id : @py_cmpop_list,
unique int parent : @py_Compare ref);
py_comprehensions(unique int id : @py_comprehension,
int parent : @py_comprehension_list ref,
int idx : int ref);
py_comprehension_lists(unique int id : @py_comprehension_list,
unique int parent : @py_ListComp ref);
py_dict_items(unique int id : @py_dict_item,
int kind: int ref,
int parent : @py_dict_item_list ref,
int idx : int ref);
py_dict_item_lists(unique int id : @py_dict_item_list,
unique int parent : @py_dict_item_list_parent ref);
py_exprs(unique int id : @py_expr,
int kind: int ref,
int parent : @py_expr_parent ref,
int idx : int ref);
py_expr_contexts(unique int id : @py_expr_context,
int kind: int ref,
unique int parent : @py_expr_context_parent ref);
py_expr_lists(unique int id : @py_expr_list,
int parent : @py_expr_list_parent ref,
int idx : int ref);
py_ints(int id : int ref,
unique int parent : @py_ImportExpr ref);
py_locations(unique int id : @location ref,
unique int parent : @py_location_parent ref);
py_numbers(varchar(1) id : string ref,
int parent : @py_Num ref,
int idx : int ref);
py_operators(unique int id : @py_operator,
int kind: int ref,
unique int parent : @py_BinaryExpr ref);
py_parameter_lists(unique int id : @py_parameter_list,
unique int parent : @py_Function ref);
py_stmts(unique int id : @py_stmt,
int kind: int ref,
int parent : @py_stmt_list ref,
int idx : int ref);
py_stmt_lists(unique int id : @py_stmt_list,
int parent : @py_stmt_list_parent ref,
int idx : int ref);
py_strs(varchar(1) id : string ref,
int parent : @py_str_parent ref,
int idx : int ref);
py_str_lists(unique int id : @py_str_list,
unique int parent : @py_str_list_parent ref);
py_unaryops(unique int id : @py_unaryop,
int kind: int ref,
unique int parent : @py_UnaryExpr ref);
py_variables(int id : @py_variable ref,
unique int parent : @py_variable_parent ref);
case @py_boolop.kind of
0 = @py_And
| 1 = @py_Or;
case @py_cmpop.kind of
0 = @py_Eq
| 1 = @py_Gt
| 2 = @py_GtE
| 3 = @py_In
| 4 = @py_Is
| 5 = @py_IsNot
| 6 = @py_Lt
| 7 = @py_LtE
| 8 = @py_NotEq
| 9 = @py_NotIn;
case @py_dict_item.kind of
0 = @py_DictUnpacking
| 1 = @py_KeyValuePair
| 2 = @py_keyword;
case @py_expr.kind of
0 = @py_Attribute
| 1 = @py_BinaryExpr
| 2 = @py_BoolExpr
| 3 = @py_Bytes
| 4 = @py_Call
| 5 = @py_ClassExpr
| 6 = @py_Compare
| 7 = @py_Dict
| 8 = @py_DictComp
| 9 = @py_Ellipsis
| 10 = @py_FunctionExpr
| 11 = @py_GeneratorExp
| 12 = @py_IfExp
| 13 = @py_ImportExpr
| 14 = @py_ImportMember
| 15 = @py_Lambda
| 16 = @py_List
| 17 = @py_ListComp
| 18 = @py_Name
| 19 = @py_Num
| 20 = @py_Repr
| 21 = @py_Set
| 22 = @py_SetComp
| 23 = @py_Slice
| 24 = @py_Starred
| 25 = @py_Str
| 26 = @py_Subscript
| 27 = @py_Tuple
| 28 = @py_UnaryExpr
| 29 = @py_Yield
| 30 = @py_YieldFrom
| 31 = @py_TemplateDottedNotation
| 32 = @py_Filter
| 33 = @py_PlaceHolder
| 34 = @py_Await
| 35 = @py_Fstring
| 36 = @py_FormattedValue
| 37 = @py_AssignExpr
| 38 = @py_SpecialOperation;
case @py_expr_context.kind of
0 = @py_AugLoad
| 1 = @py_AugStore
| 2 = @py_Del
| 3 = @py_Load
| 4 = @py_Param
| 5 = @py_Store;
case @py_operator.kind of
0 = @py_Add
| 1 = @py_BitAnd
| 2 = @py_BitOr
| 3 = @py_BitXor
| 4 = @py_Div
| 5 = @py_FloorDiv
| 6 = @py_LShift
| 7 = @py_Mod
| 8 = @py_Mult
| 9 = @py_Pow
| 10 = @py_RShift
| 11 = @py_Sub
| 12 = @py_MatMult;
case @py_stmt.kind of
0 = @py_Assert
| 1 = @py_Assign
| 2 = @py_AugAssign
| 3 = @py_Break
| 4 = @py_Continue
| 5 = @py_Delete
| 6 = @py_ExceptStmt
| 7 = @py_Exec
| 8 = @py_Expr_stmt
| 9 = @py_For
| 10 = @py_Global
| 11 = @py_If
| 12 = @py_Import
| 13 = @py_ImportStar
| 14 = @py_Nonlocal
| 15 = @py_Pass
| 16 = @py_Print
| 17 = @py_Raise
| 18 = @py_Return
| 19 = @py_Try
| 20 = @py_While
| 21 = @py_With
| 22 = @py_TemplateWrite
| 23 = @py_AnnAssign;
case @py_unaryop.kind of
0 = @py_Invert
| 1 = @py_Not
| 2 = @py_UAdd
| 3 = @py_USub;
@py_Bytes_or_Str = @py_Bytes | @py_Str;
@py_Function_parent = @py_DictComp | @py_FunctionExpr | @py_GeneratorExp | @py_Lambda | @py_ListComp | @py_SetComp;
@py_arguments_parent = @py_FunctionExpr | @py_Lambda;
@py_ast_node = @py_Class | @py_Function | @py_Module | @py_StringPart | @py_comprehension | @py_dict_item | @py_expr | @py_stmt;
@py_bool_parent = @py_For | @py_Function | @py_Print | @py_With | @py_expr;
@py_dict_item_list_parent = @py_Call | @py_ClassExpr | @py_Dict;
@py_expr_context_parent = @py_Attribute | @py_List | @py_Name | @py_PlaceHolder | @py_Starred | @py_Subscript | @py_TemplateDottedNotation | @py_Tuple;
@py_expr_list_parent = @py_Assign | @py_BoolExpr | @py_Call | @py_ClassExpr | @py_Compare | @py_Delete | @py_Fstring | @py_Function | @py_List | @py_Print | @py_Set | @py_SpecialOperation | @py_Tuple | @py_arguments | @py_comprehension;
@py_expr_or_stmt = @py_expr | @py_stmt;
@py_expr_parent = @py_AnnAssign | @py_Assert | @py_Assign | @py_AssignExpr | @py_Attribute | @py_AugAssign | @py_Await | @py_BinaryExpr | @py_Call | @py_Compare | @py_DictComp | @py_DictUnpacking | @py_ExceptStmt | @py_Exec | @py_Expr_stmt | @py_Filter | @py_For | @py_FormattedValue | @py_Function | @py_FunctionExpr | @py_GeneratorExp | @py_If | @py_IfExp | @py_ImportMember | @py_ImportStar | @py_KeyValuePair | @py_ListComp | @py_Print | @py_Raise | @py_Repr | @py_Return | @py_SetComp | @py_Slice | @py_Starred | @py_Subscript | @py_TemplateDottedNotation | @py_TemplateWrite | @py_UnaryExpr | @py_While | @py_With | @py_Yield | @py_YieldFrom | @py_alias | @py_arguments | @py_comprehension | @py_expr_list | @py_keyword | @py_parameter_list;
@py_location_parent = @py_DictUnpacking | @py_KeyValuePair | @py_StringPart | @py_comprehension | @py_expr | @py_keyword | @py_stmt;
@py_parameter = @py_Name | @py_Tuple;
@py_scope = @py_Class | @py_Function | @py_Module;
@py_stmt_list_parent = @py_Class | @py_ExceptStmt | @py_For | @py_Function | @py_If | @py_Module | @py_Try | @py_While | @py_With;
@py_str_list_parent = @py_Global | @py_Nonlocal;
@py_str_parent = @py_Attribute | @py_Class | @py_ClassExpr | @py_FormattedValue | @py_Function | @py_FunctionExpr | @py_ImportExpr | @py_ImportMember | @py_Module | @py_SpecialOperation | @py_Str | @py_StringPart | @py_TemplateDottedNotation | @py_keyword | @py_str_list;
@py_variable_parent = @py_Name | @py_PlaceHolder;
/*
* End of auto-generated part
*/
/* Map relative names to absolute names for imports */
py_absolute_names(int module : @py_Module ref,
varchar(1) relname : string ref,
varchar(1) absname : string ref);
py_exports(int id : @py_Module ref,
varchar(1) name : string ref);
/* Successor information */
py_successors(int predecessor : @py_flow_node ref,
int successor : @py_flow_node ref);
py_true_successors(int predecessor : @py_flow_node ref,
int successor : @py_flow_node ref);
py_exception_successors(int predecessor : @py_flow_node ref,
int successor : @py_flow_node ref);
py_false_successors(int predecessor : @py_flow_node ref,
int successor : @py_flow_node ref);
py_flow_bb_node(unique int flownode : @py_flow_node,
int realnode : @py_ast_node ref,
int basicblock : @py_flow_node ref,
int index : int ref);
py_scope_flow(int flow : @py_flow_node ref,
int scope : @py_scope ref,
int kind : int ref);
py_idoms(unique int node : @py_flow_node ref,
int immediate_dominator : @py_flow_node ref);
py_ssa_phi(int phi : @py_ssa_var ref,
int arg: @py_ssa_var ref);
py_ssa_var(unique int id : @py_ssa_var,
int var : @py_variable ref);
py_ssa_use(int node: @py_flow_node ref,
int var : @py_ssa_var ref);
py_ssa_defn(unique int id : @py_ssa_var ref,
int node: @py_flow_node ref);
@py_base_var = @py_variable | @py_ssa_var;
py_scopes(unique int node : @py_expr_or_stmt ref,
int scope : @py_scope ref);
py_scope_location(unique int id : @location ref,
unique int scope : @py_scope ref);
py_flags_versioned(varchar(1) name : string ref,
varchar(1) value : string ref,
varchar(1) version : string ref);
py_syntax_error_versioned(unique int id : @location ref,
varchar(1) message : string ref,
varchar(1) version : string ref);
py_comments(unique int id : @py_comment,
varchar(1) text : string ref,
unique int location : @location ref);
/* Type information support */
py_cobjects(unique int obj : @py_cobject);
py_cobjecttypes(unique int obj : @py_cobject ref,
int typeof : @py_cobject ref);
py_cobjectnames(unique int obj : @py_cobject ref,
varchar(1) name : string ref);
/* Kind should be 0 for introspection, > 0 from source, as follows:
1 from C extension source
*/
py_cobject_sources(int obj : @py_cobject ref,
int kind : int ref);
py_cmembers_versioned(int object : @py_cobject ref,
varchar(1) name : string ref,
int member : @py_cobject ref,
varchar(1) version : string ref);
py_citems(int object : @py_cobject ref,
int index : int ref,
int member : @py_cobject ref);
ext_argtype(int funcid : @py_object ref,
int arg : int ref,
int typeid : @py_object ref);
ext_rettype(int funcid : @py_object ref,
int typeid : @py_object ref);
ext_proptype(int propid : @py_object ref,
int typeid : @py_object ref);
ext_argreturn(int funcid : @py_object ref,
int arg : int ref);
py_special_objects(unique int obj : @py_cobject ref,
unique varchar(1) name : string ref);
py_decorated_object(int object : @py_object ref,
int level: int ref);
@py_object = @py_cobject | @py_flow_node;
@py_source_element = @py_ast_node | @container;
/* XML Files */
xmlEncoding (unique int id: @file ref, varchar(900) encoding: string ref);
xmlDTDs (unique int id: @xmldtd,
varchar(900) root: string ref,
varchar(900) publicId: string ref,
varchar(900) systemId: string ref,
int fileid: @file ref);
xmlElements (unique int id: @xmlelement,
varchar(900) name: string ref,
int parentid: @xmlparent ref,
int idx: int ref,
int fileid: @file ref);
xmlAttrs (unique int id: @xmlattribute,
int elementid: @xmlelement ref,
varchar(900) name: string ref,
varchar(3600) value: string ref,
int idx: int ref,
int fileid: @file ref);
xmlNs (int id: @xmlnamespace,
varchar(900) prefixName: string ref,
varchar(900) URI: string ref,
int fileid: @file ref);
xmlHasNs (int elementId: @xmlnamespaceable ref,
int nsId: @xmlnamespace ref,
int fileid: @file ref);
xmlComments (unique int id: @xmlcomment,
varchar(3600) text: string ref,
int parentid: @xmlparent ref,
int fileid: @file ref);
xmlChars (unique int id: @xmlcharacters,
varchar(3600) text: string ref,
int parentid: @xmlparent ref,
int idx: int ref,
int isCDATA: int ref,
int fileid: @file ref);
@xmlparent = @file | @xmlelement;
@xmlnamespaceable = @xmlelement | @xmlattribute;
xmllocations(int xmlElement: @xmllocatable ref,
int location: @location_default ref);
@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace;

View File

@@ -10,6 +10,7 @@ import semmle.python.Class
import semmle.python.Import
import semmle.python.Stmts
import semmle.python.Exprs
import semmle.python.Patterns
import semmle.python.Keywords
import semmle.python.Comprehensions
import semmle.python.Flow

View File

@@ -1,7 +1,7 @@
name: codeql/python-all
version: 0.0.2
version: 0.0.8-dev
groups: python
dbscheme: semmlecode.python.dbscheme
extractor: python
library: true
dependencies:
codeql/python-upgrades: 0.0.2
upgrades: upgrades

View File

@@ -304,6 +304,8 @@ module API {
* API graph node for the prefix `foo`), in accordance with the usual semantics of Python.
*/
private import semmle.python.internal.Awaited
cached
newtype TApiNode =
/** The root of the API graph. */
@@ -356,134 +358,26 @@ module API {
)
}
/** Gets the name of a known built-in. */
private string getBuiltInName() {
// These lists were created by inspecting the `builtins` and `__builtin__` modules in
// Python 3 and 2 respectively, using the `dir` built-in.
// Built-in functions and exceptions shared between Python 2 and 3
result in [
"abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
"compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
"float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
"id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
"max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
"property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
"staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__",
// Exceptions
"ArithmeticError", "AssertionError", "AttributeError", "BaseException", "BufferError",
"BytesWarning", "DeprecationWarning", "EOFError", "EnvironmentError", "Exception",
"FloatingPointError", "FutureWarning", "GeneratorExit", "IOError", "ImportError",
"ImportWarning", "IndentationError", "IndexError", "KeyError", "KeyboardInterrupt",
"LookupError", "MemoryError", "NameError", "NotImplemented", "NotImplementedError",
"OSError", "OverflowError", "PendingDeprecationWarning", "ReferenceError", "RuntimeError",
"RuntimeWarning", "StandardError", "StopIteration", "SyntaxError", "SyntaxWarning",
"SystemError", "SystemExit", "TabError", "TypeError", "UnboundLocalError",
"UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
"UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
// Added for compatibility
"exec"
]
or
// Built-in constants shared between Python 2 and 3
result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
or
// Python 3 only
result in [
"ascii", "breakpoint", "bytes", "exec",
// Exceptions
"BlockingIOError", "BrokenPipeError", "ChildProcessError", "ConnectionAbortedError",
"ConnectionError", "ConnectionRefusedError", "ConnectionResetError", "FileExistsError",
"FileNotFoundError", "InterruptedError", "IsADirectoryError", "ModuleNotFoundError",
"NotADirectoryError", "PermissionError", "ProcessLookupError", "RecursionError",
"ResourceWarning", "StopAsyncIteration", "TimeoutError"
]
or
// Python 2 only
result in [
"basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload",
"unichr", "unicode", "xrange"
]
}
/**
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
*
* Currently this is an over-approximation, and may not account for things like overwriting a
* built-in with a different value.
*/
private DataFlow::Node likely_builtin(string name) {
exists(Module m |
result.asCfgNode() =
any(NameNode n |
possible_builtin_accessed_in_module(n, name, m) and
not possible_builtin_defined_in_module(name, m)
)
)
}
/**
* Holds if a global variable called `name` (which is also the name of a built-in) is assigned
* a value in the module `m`.
*/
private predicate possible_builtin_defined_in_module(string name, Module m) {
global_name_defined_in_module(name, m) and
name = getBuiltInName()
}
/**
* Holds if `n` is an access of a global variable called `name` (which is also the name of a
* built-in) inside the module `m`.
*/
private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
n.isGlobal() and
n.isLoad() and
name = n.getId() and
name = getBuiltInName() and
m = n.getEnclosingModule()
}
/**
* Holds if `n` is an access of a variable called `name` (which is _not_ the name of a
* built-in, and which is _not_ a global defined in the enclosing module) inside the scope `s`.
*/
private predicate name_possibly_defined_in_import_star(NameNode n, string name, Scope s) {
n.isLoad() and
name = n.getId() and
// Not already defined in an enclosing scope.
not exists(LocalVariable v |
v.getId() = name and v.getScope() = n.getScope().getEnclosingScope*()
) and
not name = getBuiltInName() and
s = n.getScope().getEnclosingScope*() and
exists(potential_import_star_base(s)) and
not global_name_defined_in_module(name, n.getEnclosingModule())
}
/** Holds if a global variable called `name` is assigned a value in the module `m`. */
private predicate global_name_defined_in_module(string name, Module m) {
exists(NameNode n |
not exists(LocalVariable v | n.defines(v)) and
n.isStore() and
name = n.getId() and
m = n.getEnclosingModule()
)
}
private import semmle.python.dataflow.new.internal.Builtins
private import semmle.python.dataflow.new.internal.ImportStar
/**
* Gets the API graph node for all modules imported with `from ... import *` inside the scope `s`.
*
* For example, given
*
* `from foo.bar import *`
* ```python
* from foo.bar import *
* ```
*
* this would be the API graph node with the path
*
* `moduleImport("foo").getMember("bar")`
*/
private TApiNode potential_import_star_base(Scope s) {
exists(DataFlow::Node ref |
ref.asCfgNode() = any(ImportStarNode n | n.getScope() = s).getModule() and
use(result, ref)
exists(DataFlow::Node n |
n.asCfgNode() = ImportStar::potentialImportStarBase(s) and
use(result, n)
)
}
@@ -518,24 +412,23 @@ module API {
)
or
// awaiting
exists(Await await, DataFlow::Node awaitedValue |
exists(DataFlow::Node awaitedValue |
lbl = Label::await() and
ref.asExpr() = await and
await.getValue() = awaitedValue.asExpr() and
ref = awaited(awaitedValue) and
pred.flowsTo(awaitedValue)
)
)
or
// Built-ins, treated as members of the module `builtins`
base = MkModuleImport("builtins") and
lbl = Label::member(any(string name | ref = likely_builtin(name)))
lbl = Label::member(any(string name | ref = Builtins::likelyBuiltin(name)))
or
// Unknown variables that may belong to a module imported with `import *`
exists(Scope s |
base = potential_import_star_base(s) and
lbl =
Label::member(any(string name |
name_possibly_defined_in_import_star(ref.asCfgNode(), name, s)
ImportStar::namePossiblyDefinedInImportStar(ref.asCfgNode(), name, s)
))
)
}

View File

@@ -82,6 +82,12 @@ library class StrListParent extends StrListParent_ { }
/** Internal implementation class */
library class ExprParent extends ExprParent_ { }
/** Internal implementation class */
class PatternListParent extends PatternListParent_ { }
/** Internal implementation class */
library class PatternParent extends PatternParent_ { }
library class DictItem extends DictItem_, AstNode {
override string toString() { result = DictItem_.super.toString() }
@@ -162,6 +168,9 @@ class ExprList extends ExprList_ {
/* syntax: Expr, ... */
}
/** A list of patterns */
class PatternList extends PatternList_ { }
library class DictItemList extends DictItemList_ { }
library class DictItemListParent extends DictItemListParent_ { }

View File

@@ -218,6 +218,26 @@ library class Call_ extends @py_Call, Expr {
override string toString() { result = "Call" }
}
/** INTERNAL: See the class `Case` for further information. */
library class Case_ extends @py_Case, Stmt {
/** Gets the pattern of this case statement. */
Pattern getPattern() { py_patterns(result, _, this, 1) }
/** Gets the guard of this case statement. */
Expr getGuard() { py_exprs(result, _, this, 2) }
/** Gets the body of this case statement. */
StmtList getBody() { py_stmt_lists(result, this, 3) }
/** Gets the nth statement of this case statement. */
Stmt getStmt(int index) { result = this.getBody().getItem(index) }
/** Gets a statement of this case statement. */
Stmt getAStmt() { result = this.getBody().getAnItem() }
override string toString() { result = "Case" }
}
/** INTERNAL: See the class `Class` for further information. */
library class Class_ extends @py_Class {
/** Gets the name of this class. */
@@ -232,6 +252,7 @@ library class Class_ extends @py_Class {
/** Gets a statement of this class. */
Stmt getAStmt() { result = this.getBody().getAnItem() }
/** Gets a parent of this class */
ClassExpr getParent() { py_Classes(this, result) }
/** Gets a textual representation of this element. */
@@ -513,6 +534,7 @@ library class Function_ extends @py_Function {
/** Whether the async property of this function is true. */
predicate isAsync() { py_bools(this, 6) }
/** Gets a parent of this function */
FunctionParent getParent() { py_Functions(this, result) }
/** Gets a textual representation of this element. */
@@ -577,6 +599,14 @@ library class GtE_ extends @py_GtE, Cmpop {
override string toString() { result = "GtE" }
}
/** INTERNAL: See the class `Guard` for further information. */
library class Guard_ extends @py_Guard, Expr {
/** Gets the test of this guard expression. */
Expr getTest() { py_exprs(result, _, this, 2) }
override string toString() { result = "Guard" }
}
/** INTERNAL: See the class `If` for further information. */
library class If_ extends @py_If, Stmt {
/** Gets the test of this if statement. */
@@ -790,6 +820,172 @@ library class MatMult_ extends @py_MatMult, Operator {
override string toString() { result = "MatMult" }
}
/** INTERNAL: See the class `MatchStmt` for further information. */
library class MatchStmt_ extends @py_MatchStmt, Stmt {
/** Gets the subject of this match statement. */
Expr getSubject() { py_exprs(result, _, this, 1) }
/** Gets the cases of this match statement. */
StmtList getCases() { py_stmt_lists(result, this, 2) }
/** Gets the nth case of this match statement. */
Stmt getCase(int index) { result = this.getCases().getItem(index) }
/** Gets a case of this match statement. */
Stmt getACase() { result = this.getCases().getAnItem() }
override string toString() { result = "MatchStmt" }
}
/** INTERNAL: See the class `MatchAsPattern` for further information. */
library class MatchAsPattern_ extends @py_MatchAsPattern, Pattern {
/** Gets the pattern of this matchaspattern pattern. */
Pattern getPattern() { py_patterns(result, _, this, 2) }
/** Gets the alias of this matchaspattern pattern. */
Expr getAlias() { py_exprs(result, _, this, 3) }
override string toString() { result = "MatchAsPattern" }
}
/** INTERNAL: See the class `MatchCapturePattern` for further information. */
library class MatchCapturePattern_ extends @py_MatchCapturePattern, Pattern {
/** Gets the variable of this matchcapturepattern pattern. */
Expr getVariable() { py_exprs(result, _, this, 2) }
override string toString() { result = "MatchCapturePattern" }
}
/** INTERNAL: See the class `MatchClassPattern` for further information. */
library class MatchClassPattern_ extends @py_MatchClassPattern, Pattern {
/** Gets the class of this matchclasspattern pattern. */
Expr getClass() { py_exprs(result, _, this, 2) }
/** Gets the class_name of this matchclasspattern pattern. */
Expr getClassName() { py_exprs(result, _, this, 3) }
/** Gets the positional of this matchclasspattern pattern. */
PatternList getPositional() { py_pattern_lists(result, this, 4) }
/** Gets the nth positional of this matchclasspattern pattern. */
Pattern getPositional(int index) { result = this.getPositional().getItem(index) }
/** Gets a positional of this matchclasspattern pattern. */
Pattern getAPositional() { result = this.getPositional().getAnItem() }
/** Gets the keyword of this matchclasspattern pattern. */
PatternList getKeyword() { py_pattern_lists(result, this, 5) }
/** Gets the nth keyword of this matchclasspattern pattern. */
Pattern getKeyword(int index) { result = this.getKeyword().getItem(index) }
/** Gets a keyword of this matchclasspattern pattern. */
Pattern getAKeyword() { result = this.getKeyword().getAnItem() }
override string toString() { result = "MatchClassPattern" }
}
/** INTERNAL: See the class `MatchDoubleStarPattern` for further information. */
library class MatchDoubleStarPattern_ extends @py_MatchDoubleStarPattern, Pattern {
/** Gets the target of this matchdoublestarpattern pattern. */
Pattern getTarget() { py_patterns(result, _, this, 2) }
override string toString() { result = "MatchDoubleStarPattern" }
}
/** INTERNAL: See the class `MatchKeyValuePattern` for further information. */
library class MatchKeyValuePattern_ extends @py_MatchKeyValuePattern, Pattern {
/** Gets the key of this matchkeyvaluepattern pattern. */
Pattern getKey() { py_patterns(result, _, this, 2) }
/** Gets the value of this matchkeyvaluepattern pattern. */
Pattern getValue() { py_patterns(result, _, this, 3) }
override string toString() { result = "MatchKeyValuePattern" }
}
/** INTERNAL: See the class `MatchKeywordPattern` for further information. */
library class MatchKeywordPattern_ extends @py_MatchKeywordPattern, Pattern {
/** Gets the attribute of this matchkeywordpattern pattern. */
Expr getAttribute() { py_exprs(result, _, this, 2) }
/** Gets the value of this matchkeywordpattern pattern. */
Pattern getValue() { py_patterns(result, _, this, 3) }
override string toString() { result = "MatchKeywordPattern" }
}
/** INTERNAL: See the class `MatchLiteralPattern` for further information. */
library class MatchLiteralPattern_ extends @py_MatchLiteralPattern, Pattern {
/** Gets the literal of this matchliteralpattern pattern. */
Expr getLiteral() { py_exprs(result, _, this, 2) }
override string toString() { result = "MatchLiteralPattern" }
}
/** INTERNAL: See the class `MatchMappingPattern` for further information. */
library class MatchMappingPattern_ extends @py_MatchMappingPattern, Pattern {
/** Gets the mappings of this matchmappingpattern pattern. */
PatternList getMappings() { py_pattern_lists(result, this, 2) }
/** Gets the nth mapping of this matchmappingpattern pattern. */
Pattern getMapping(int index) { result = this.getMappings().getItem(index) }
/** Gets a mapping of this matchmappingpattern pattern. */
Pattern getAMapping() { result = this.getMappings().getAnItem() }
override string toString() { result = "MatchMappingPattern" }
}
/** INTERNAL: See the class `MatchOrPattern` for further information. */
library class MatchOrPattern_ extends @py_MatchOrPattern, Pattern {
/** Gets the patterns of this matchorpattern pattern. */
PatternList getPatterns() { py_pattern_lists(result, this, 2) }
/** Gets the nth pattern of this matchorpattern pattern. */
Pattern getPattern(int index) { result = this.getPatterns().getItem(index) }
/** Gets a pattern of this matchorpattern pattern. */
Pattern getAPattern() { result = this.getPatterns().getAnItem() }
override string toString() { result = "MatchOrPattern" }
}
/** INTERNAL: See the class `MatchSequencePattern` for further information. */
library class MatchSequencePattern_ extends @py_MatchSequencePattern, Pattern {
/** Gets the patterns of this matchsequencepattern pattern. */
PatternList getPatterns() { py_pattern_lists(result, this, 2) }
/** Gets the nth pattern of this matchsequencepattern pattern. */
Pattern getPattern(int index) { result = this.getPatterns().getItem(index) }
/** Gets a pattern of this matchsequencepattern pattern. */
Pattern getAPattern() { result = this.getPatterns().getAnItem() }
override string toString() { result = "MatchSequencePattern" }
}
/** INTERNAL: See the class `MatchStarPattern` for further information. */
library class MatchStarPattern_ extends @py_MatchStarPattern, Pattern {
/** Gets the target of this matchstarpattern pattern. */
Pattern getTarget() { py_patterns(result, _, this, 2) }
override string toString() { result = "MatchStarPattern" }
}
/** INTERNAL: See the class `MatchValuePattern` for further information. */
library class MatchValuePattern_ extends @py_MatchValuePattern, Pattern {
/** Gets the value of this matchvaluepattern pattern. */
Expr getValue() { py_exprs(result, _, this, 2) }
override string toString() { result = "MatchValuePattern" }
}
/** INTERNAL: See the class `MatchWildcardPattern` for further information. */
library class MatchWildcardPattern_ extends @py_MatchWildcardPattern, Pattern {
override string toString() { result = "MatchWildcardPattern" }
}
/** INTERNAL: See the class `Mod` for further information. */
library class Mod_ extends @py_Mod, Operator {
override string toString() { result = "Mod" }
@@ -1073,6 +1269,7 @@ library class StringPart_ extends @py_StringPart {
/** Gets the location of this implicitly concatenated part. */
Location getLocation() { py_locations(result, this) }
/** Gets a parent of this implicitly concatenated part */
StringPartList getParent() { py_StringParts(this, result, _) }
/** Gets a textual representation of this element. */
@@ -1081,6 +1278,7 @@ library class StringPart_ extends @py_StringPart {
/** INTERNAL: See the class `StringPartList` for further information. */
library class StringPartList_ extends @py_StringPart_list {
/** Gets a parent of this implicitly concatenated part list */
BytesOrStr getParent() { py_StringPart_lists(this, result) }
/** Gets an item of this implicitly concatenated part list */
@@ -1288,6 +1486,7 @@ library class Alias_ extends @py_alias {
/** Gets the name of this alias. */
Expr getAsname() { py_exprs(result, _, this, 1) }
/** Gets a parent of this alias */
AliasList getParent() { py_aliases(this, result, _) }
/** Gets a textual representation of this element. */
@@ -1296,6 +1495,7 @@ library class Alias_ extends @py_alias {
/** INTERNAL: See the class `AliasList` for further information. */
library class AliasList_ extends @py_alias_list {
/** Gets a parent of this alias list */
Import getParent() { py_alias_lists(this, result) }
/** Gets an item of this alias list */
@@ -1352,6 +1552,7 @@ library class Arguments_ extends @py_arguments {
/** Gets a keyword-only annotation of this parameters definition. */
Expr getAKwAnnotation() { result = this.getKwAnnotations().getAnItem() }
/** Gets a parent of this parameters definition */
ArgumentsParent getParent() { py_arguments(this, result) }
/** Gets a textual representation of this element. */
@@ -1378,6 +1579,7 @@ library class BoolParent_ extends @py_bool_parent {
/** INTERNAL: See the class `Boolop` for further information. */
library class Boolop_ extends @py_boolop {
/** Gets a parent of this boolean operator */
BoolExpr getParent() { py_boolops(this, _, result) }
/** Gets a textual representation of this element. */
@@ -1386,6 +1588,7 @@ library class Boolop_ extends @py_boolop {
/** INTERNAL: See the class `Cmpop` for further information. */
library class Cmpop_ extends @py_cmpop {
/** Gets a parent of this comparison operator */
CmpopList getParent() { py_cmpops(this, _, result, _) }
/** Gets a textual representation of this element. */
@@ -1394,6 +1597,7 @@ library class Cmpop_ extends @py_cmpop {
/** INTERNAL: See the class `CmpopList` for further information. */
library class CmpopList_ extends @py_cmpop_list {
/** Gets a parent of this comparison operator list */
Compare getParent() { py_cmpop_lists(this, result) }
/** Gets an item of this comparison operator list */
@@ -1426,6 +1630,7 @@ library class Comprehension_ extends @py_comprehension {
/** Gets a condition of this comprehension. */
Expr getAnIf() { result = this.getIfs().getAnItem() }
/** Gets a parent of this comprehension */
ComprehensionList getParent() { py_comprehensions(this, result, _) }
/** Gets a textual representation of this element. */
@@ -1434,6 +1639,7 @@ library class Comprehension_ extends @py_comprehension {
/** INTERNAL: See the class `ComprehensionList` for further information. */
library class ComprehensionList_ extends @py_comprehension_list {
/** Gets a parent of this comprehension list */
ListComp getParent() { py_comprehension_lists(this, result) }
/** Gets an item of this comprehension list */
@@ -1448,6 +1654,7 @@ library class ComprehensionList_ extends @py_comprehension_list {
/** INTERNAL: See the class `DictItem` for further information. */
library class DictItem_ extends @py_dict_item {
/** Gets a parent of this dict_item */
DictItemList getParent() { py_dict_items(this, _, result, _) }
/** Gets a textual representation of this element. */
@@ -1456,6 +1663,7 @@ library class DictItem_ extends @py_dict_item {
/** INTERNAL: See the class `DictItemList` for further information. */
library class DictItemList_ extends @py_dict_item_list {
/** Gets a parent of this dict_item list */
DictItemListParent getParent() { py_dict_item_lists(this, result) }
/** Gets an item of this dict_item list */
@@ -1482,6 +1690,7 @@ library class Expr_ extends @py_expr {
/** Whether the parenthesised property of this expression is true. */
predicate isParenthesised() { py_bools(this, 1) }
/** Gets a parent of this expression */
ExprParent getParent() { py_exprs(this, _, result, _) }
/** Gets a textual representation of this element. */
@@ -1490,6 +1699,7 @@ library class Expr_ extends @py_expr {
/** INTERNAL: See the class `ExprContext` for further information. */
library class ExprContext_ extends @py_expr_context {
/** Gets a parent of this expression context */
ExprContextParent getParent() { py_expr_contexts(this, _, result) }
/** Gets a textual representation of this element. */
@@ -1504,6 +1714,7 @@ library class ExprContextParent_ extends @py_expr_context_parent {
/** INTERNAL: See the class `ExprList` for further information. */
library class ExprList_ extends @py_expr_list {
/** Gets a parent of this expression list */
ExprListParent getParent() { py_expr_lists(this, result, _) }
/** Gets an item of this expression list */
@@ -1556,6 +1767,7 @@ library class LocationParent_ extends @py_location_parent {
/** INTERNAL: See the class `Operator` for further information. */
library class Operator_ extends @py_operator {
/** Gets a parent of this operator */
BinaryExpr getParent() { py_operators(this, _, result) }
/** Gets a textual representation of this element. */
@@ -1568,6 +1780,48 @@ library class Parameter_ extends @py_parameter {
string toString() { result = "Parameter" }
}
/** INTERNAL: See the class `Pattern` for further information. */
library class Pattern_ extends @py_pattern {
/** Gets the location of this pattern. */
Location getLocation() { py_locations(result, this) }
/** Whether the parenthesised property of this pattern is true. */
predicate isParenthesised() { py_bools(this, 1) }
/** Gets a parent of this pattern */
PatternParent getParent() { py_patterns(this, _, result, _) }
/** Gets a textual representation of this element. */
string toString() { result = "Pattern" }
}
/** INTERNAL: See the class `PatternList` for further information. */
library class PatternList_ extends @py_pattern_list {
/** Gets a parent of this pattern list */
PatternListParent getParent() { py_pattern_lists(this, result, _) }
/** Gets an item of this pattern list */
Pattern getAnItem() { py_patterns(result, _, this, _) }
/** Gets the nth item of this pattern list */
Pattern getItem(int index) { py_patterns(result, _, this, index) }
/** Gets a textual representation of this element. */
string toString() { result = "PatternList" }
}
/** INTERNAL: See the class `PatternListParent` for further information. */
library class PatternListParent_ extends @py_pattern_list_parent {
/** Gets a textual representation of this element. */
string toString() { result = "PatternListParent" }
}
/** INTERNAL: See the class `PatternParent` for further information. */
library class PatternParent_ extends @py_pattern_parent {
/** Gets a textual representation of this element. */
string toString() { result = "PatternParent" }
}
/** INTERNAL: See the class `Scope` for further information. */
library class Scope_ extends @py_scope {
/** Gets a textual representation of this element. */
@@ -1579,6 +1833,7 @@ library class Stmt_ extends @py_stmt {
/** Gets the location of this statement. */
Location getLocation() { py_locations(result, this) }
/** Gets a parent of this statement */
StmtList getParent() { py_stmts(this, _, result, _) }
/** Gets a textual representation of this element. */
@@ -1587,6 +1842,7 @@ library class Stmt_ extends @py_stmt {
/** INTERNAL: See the class `StmtList` for further information. */
library class StmtList_ extends @py_stmt_list {
/** Gets a parent of this statement list */
StmtListParent getParent() { py_stmt_lists(this, result, _) }
/** Gets an item of this statement list */
@@ -1607,6 +1863,7 @@ library class StmtListParent_ extends @py_stmt_list_parent {
/** INTERNAL: See the class `StringList` for further information. */
library class StringList_ extends @py_str_list {
/** Gets a parent of this string list */
StrListParent getParent() { py_str_lists(this, result) }
/** Gets an item of this string list */
@@ -1633,6 +1890,7 @@ library class StrParent_ extends @py_str_parent {
/** INTERNAL: See the class `Unaryop` for further information. */
library class Unaryop_ extends @py_unaryop {
/** Gets a parent of this unary operation */
UnaryExpr getParent() { py_unaryops(this, _, result) }
/** Gets a textual representation of this element. */

View File

@@ -40,7 +40,7 @@ class Comment extends @py_comment {
private predicate comment_block_part(Comment start, Comment part, int i) {
not exists(Comment prev | prev.getFollowing() = part) and
exists(Comment following | part.getFollowing() = following) and
exists(part.getFollowing()) and
start = part and
i = 1
or

View File

@@ -514,7 +514,7 @@ class ComparisonControlBlock extends ConditionBlock {
Comparison getTest() { this.getLastNode() = result }
/** Whether this conditional guard implies that, in block `b`, the result of `that` is `thatIsTrue` */
/** Whether this conditional guard implies that, in block `b`, the result of `that` is `thatIsTrue` */
predicate impliesThat(BasicBlock b, Comparison that, boolean thatIsTrue) {
exists(boolean controlSense |
this.controls(b, controlSense) and

View File

@@ -326,9 +326,47 @@ module CodeExecution {
}
}
/**
* A data-flow node that constructs an SQL statement.
* Often, it is worthy of an alert if an SQL statement is constructed such that
* executing it would be a security risk.
*
* If it is important that the SQL statement is indeed executed, then use `SQLExecution`.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SqlConstruction::Range` instead.
*/
class SqlConstruction extends DataFlow::Node {
SqlConstruction::Range range;
SqlConstruction() { this = range }
/** Gets the argument that specifies the SQL statements to be constructed. */
DataFlow::Node getSql() { result = range.getSql() }
}
/** Provides a class for modeling new SQL execution APIs. */
module SqlConstruction {
/**
* A data-flow node that constructs an SQL statement.
* Often, it is worthy of an alert if an SQL statement is constructed such that
* executing it would be a security risk.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `SqlExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the SQL statements to be constructed. */
abstract DataFlow::Node getSql();
}
}
/**
* A data-flow node that executes SQL statements.
*
* If the context of interest is such that merely constructing an SQL statement
* would be valuabe to report, then consider using `SqlConstruction`.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `SqlExecution::Range` instead.
*/
@@ -346,6 +384,9 @@ module SqlExecution {
/**
* A data-flow node that executes SQL statements.
*
* If the context of interest is such that merely constructing an SQL statement
* would be valuabe to report, then consider using `SqlConstruction`.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `SqlExecution` instead.
*/
@@ -771,6 +812,72 @@ module HTTP {
}
}
}
/** Provides classes for modeling HTTP clients. */
module Client {
/**
* A data-flow node that makes an outgoing HTTP request.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `HTTP::Client::Request::Range` instead.
*/
class Request extends DataFlow::Node instanceof Request::Range {
/**
* Gets a data-flow node that contributes to the URL of the request.
* Depending on the framework, a request may have multiple nodes which contribute to the URL.
*/
DataFlow::Node getAUrlPart() { result = super.getAUrlPart() }
/** Gets a string that identifies the framework used for this request. */
string getFramework() { result = super.getFramework() }
/**
* Holds if this request is made using a mode that disables SSL/TLS
* certificate validation, where `disablingNode` represents the point at
* which the validation was disabled, and `argumentOrigin` represents the origin
* of the argument that disabled the validation (which could be the same node as
* `disablingNode`).
*/
predicate disablesCertificateValidation(
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
) {
super.disablesCertificateValidation(disablingNode, argumentOrigin)
}
}
/** Provides a class for modeling new HTTP requests. */
module Request {
/**
* A data-flow node that makes an outgoing HTTP request.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `HTTP::Client::Request` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets a data-flow node that contributes to the URL of the request.
* Depending on the framework, a request may have multiple nodes which contribute to the URL.
*/
abstract DataFlow::Node getAUrlPart();
/** Gets a string that identifies the framework used for this request. */
abstract string getFramework();
/**
* Holds if this request is made using a mode that disables SSL/TLS
* certificate validation, where `disablingNode` represents the point at
* which the validation was disabled, and `argumentOrigin` represents the origin
* of the argument that disabled the validation (which could be the same node as
* `disablingNode`).
*/
abstract predicate disablesCertificateValidation(
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
);
}
}
// TODO: investigate whether we should treat responses to client requests as
// remote-flow-sources in general.
}
}
/**

View File

@@ -718,6 +718,12 @@ class FormattedValue extends FormattedValue_ {
}
}
/** A guard in a case statement */
class Guard extends Guard_ {
/* syntax: if Expr */
override Expr getASubExpression() { result = this.getTest() }
}
/* Expression Contexts */
/** A context in which an expression used */
class ExprContext extends ExprContext_ { }

View File

@@ -6,13 +6,18 @@
// `docs/codeql/support/reusables/frameworks.rst`
private import semmle.python.frameworks.Aioch
private import semmle.python.frameworks.Aiohttp
private import semmle.python.frameworks.Aiomysql
private import semmle.python.frameworks.Aiopg
private import semmle.python.frameworks.Asyncpg
private import semmle.python.frameworks.ClickhouseDriver
private import semmle.python.frameworks.Cryptodome
private import semmle.python.frameworks.Cryptography
private import semmle.python.frameworks.Dill
private import semmle.python.frameworks.Django
private import semmle.python.frameworks.Fabric
private import semmle.python.frameworks.FastApi
private import semmle.python.frameworks.Flask
private import semmle.python.frameworks.FlaskAdmin
private import semmle.python.frameworks.FlaskSqlAlchemy
private import semmle.python.frameworks.Idna
private import semmle.python.frameworks.Invoke
@@ -23,12 +28,17 @@ private import semmle.python.frameworks.Mysql
private import semmle.python.frameworks.MySQLdb
private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.Pydantic
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Requests
private import semmle.python.frameworks.RestFramework
private import semmle.python.frameworks.Rsa
private import semmle.python.frameworks.RuamelYaml
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.SqlAlchemy
private import semmle.python.frameworks.Starlette
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.Toml
private import semmle.python.frameworks.Tornado
private import semmle.python.frameworks.Twisted
private import semmle.python.frameworks.Ujson

View File

@@ -18,7 +18,7 @@ class Function extends Function_, Scope, AstNode {
override Scope getScope() { result = this.getEnclosingScope() }
/** Whether this function is declared in a class */
predicate isMethod() { exists(Class cls | this.getEnclosingScope() = cls) }
predicate isMethod() { this.getEnclosingScope() instanceof Class }
/** Whether this is a special method, that is does its name have the form `__xxx__` (except `__init__`) */
predicate isSpecialMethod() {

View File

@@ -98,7 +98,7 @@ class LShift extends LShift_ {
override string getSpecialMethodName() { result = "__lshift__" }
}
/** A modulo (`%`) binary operator, which includes string formatting */
/** A modulo (`%`) binary operator, which includes string formatting */
class Mod extends Mod_ {
override string getSpecialMethodName() { result = "__mod__" }
}

View File

@@ -0,0 +1,118 @@
/**
* Wrapping generated AST classes: `Pattern_` and subclasses.
*/
import python
/** A pattern in a match statement */
class Pattern extends Pattern_, AstNode {
/** Gets the scope of this pattern */
override Scope getScope() { result = this.getCase().getScope() }
/** Gets the case statement containing this pattern */
Case getCase() { result.contains(this) }
override string toString() { result = "Pattern" }
/** Gets the module enclosing this pattern */
Module getEnclosingModule() { result = this.getScope().getEnclosingModule() }
/** Whether the parenthesized property of this expression is true. */
predicate isParenthesized() { Pattern_.super.isParenthesised() }
override Location getLocation() { result = Pattern_.super.getLocation() }
/** Gets an immediate (non-nested) sub-expression of this pattern */
Expr getASubExpression() { none() }
/** Gets an immediate (non-nested) sub-statement of this pattern */
Stmt getASubStatement() { none() }
/** Gets an immediate (non-nested) sub-pattern of this pattern */
Pattern getASubPattern() { none() }
override AstNode getAChildNode() {
result = this.getASubExpression()
or
result = this.getASubStatement()
or
result = this.getASubPattern()
}
}
/** An as-pattern in a match statement: `<subpattern> as alias` */
class MatchAsPattern extends MatchAsPattern_ {
override Pattern getASubPattern() { result = this.getPattern() }
override Expr getASubExpression() { result = this.getAlias() }
override Name getAlias() { result = super.getAlias() }
}
/** An or-pattern in a match statement: `(<pattern1>|<pattern2>)` */
class MatchOrPattern extends MatchOrPattern_ {
override Pattern getASubPattern() { result = this.getAPattern() }
}
/** A literal pattern in a match statement: `42` */
class MatchLiteralPattern extends MatchLiteralPattern_ {
override Expr getASubExpression() { result = this.getLiteral() }
}
/** A capture pattern in a match statement: `var` */
class MatchCapturePattern extends MatchCapturePattern_ {
/* syntax: varname */
override Expr getASubExpression() { result = this.getVariable() }
/** Gets the variable that is bound by this capture pattern */
override Name getVariable() { result = super.getVariable() }
}
/** A wildcard pattern in a match statement: `_` */
class MatchWildcardPattern extends MatchWildcardPattern_ { }
/** A value pattern in a match statement: `Http.OK` */
class MatchValuePattern extends MatchValuePattern_ {
override Expr getASubExpression() { result = this.getValue() }
}
/** A sequence pattern in a match statement `<p1>, <p2>` */
class MatchSequencePattern extends MatchSequencePattern_ {
override Pattern getASubPattern() { result = this.getAPattern() }
}
/** A star pattern in a match statement: `(..., *)` */
class MatchStarPattern extends MatchStarPattern_ {
override Pattern getASubPattern() { result = this.getTarget() }
}
/** A mapping pattern in a match statement: `{'a': var}` */
class MatchMappingPattern extends MatchMappingPattern_ {
override Pattern getASubPattern() { result = this.getAMapping() }
}
/** A double star pattern in a match statement: `{..., **}` */
class MatchDoubleStarPattern extends MatchDoubleStarPattern_ {
override Pattern getASubPattern() { result = this.getTarget() }
}
/** A key-value pattern inside a mapping pattern: `a: var` */
class MatchKeyValuePattern extends MatchKeyValuePattern_ {
override Pattern getASubPattern() { result = this.getKey() or result = this.getValue() }
}
/** A class pattern in a match statement: `Circle(radius = 3)` */
class MatchClassPattern extends MatchClassPattern_ {
override Expr getASubExpression() { result = this.getClassName() }
override Pattern getASubPattern() {
result = this.getAPositional() or result = this.getAKeyword()
}
}
/** A keyword pattern inside a class pattern: `radius = 3` */
class MatchKeywordPattern extends MatchKeywordPattern_ {
override Expr getASubExpression() { result = this.getAttribute() }
override Pattern getASubPattern() { result = this.getValue() }
}

View File

@@ -76,7 +76,7 @@ class PrintAstNode extends TPrintAstNode {
/**
* Gets a child of this node.
*/
final PrintAstNode getAChild() { result = getChild(_) }
final PrintAstNode getAChild() { result = this.getChild(_) }
/**
* Gets the parent of this node, if any.
@@ -94,7 +94,7 @@ class PrintAstNode extends TPrintAstNode {
*/
string getProperty(string key) {
key = "semmle.label" and
result = toString()
result = this.toString()
}
/**
@@ -103,7 +103,7 @@ class PrintAstNode extends TPrintAstNode {
* this.
*/
string getChildEdgeLabel(int childIndex) {
exists(getChild(childIndex)) and
exists(this.getChild(childIndex)) and
result = childIndex.toString()
}
}
@@ -157,13 +157,13 @@ class AstElementNode extends PrintAstNode, TElementNode {
override PrintAstNode getChild(int childIndex) {
exists(AstNode el | result.(AstElementNode).getAstNode() = el |
el = this.getChildNode(childIndex) and not el = getStmtList(_, _).getAnItem()
el = this.getChildNode(childIndex) and not el = this.getStmtList(_, _).getAnItem()
)
or
// displaying all `StmtList` after the other children.
exists(int offset | offset = 1 + max([0, any(int index | exists(this.getChildNode(index)))]) |
exists(int index | childIndex = index + offset |
result.(StmtListNode).getList() = getStmtList(index, _)
result.(StmtListNode).getList() = this.getStmtList(index, _)
)
)
}
@@ -299,7 +299,7 @@ class StmtListNode extends PrintAstNode, TStmtListNode {
private string getLabel() { this.getList() = any(AstElementNode node).getStmtList(_, result) }
override string toString() { result = "(StmtList) " + getLabel() }
override string toString() { result = "(StmtList) " + this.getLabel() }
override PrintAstNode getChild(int childIndex) {
exists(AstNode el | result.(AstElementNode).getAstNode() = el | el = list.getItem(childIndex))

View File

@@ -467,9 +467,10 @@ class RegExpEscape extends RegExpNormalChar {
or
this.getUnescaped() = "t" and result = "\t"
or
// TODO: Find a way to include a formfeed character
// this.getUnescaped() = "f" and result = " "
// or
this.getUnescaped() = "f" and result = 12.toUnicode()
or
this.getUnescaped() = "v" and result = 11.toUnicode()
or
this.isUnicode() and
result = this.getUnicode()
}
@@ -480,7 +481,7 @@ class RegExpEscape extends RegExpNormalChar {
override string getPrimaryQLClass() { result = "RegExpEscape" }
/** Gets the part of the term following the escape character. That is e.g. "w" if the term is "\w". */
private string getUnescaped() { result = this.getText().suffix(1) }
string getUnescaped() { result = this.getText().suffix(1) }
/**
* Gets the text for this escape. That is e.g. "\w".
@@ -535,6 +536,13 @@ private int toHex(string hex) {
result = 15 and hex = ["f", "F"]
}
/**
* A word boundary, that is, a regular expression term of the form `\b`.
*/
class RegExpWordBoundary extends RegExpSpecialChar {
RegExpWordBoundary() { this.getChar() = "\\b" }
}
/**
* A character class escape in a regular expression.
* That is, an escaped charachter that denotes multiple characters.
@@ -801,7 +809,7 @@ class RegExpDot extends RegExpSpecialChar {
}
/**
* A dollar assertion `$` matching the end of a line.
* A dollar assertion `$` or `\Z` matching the end of a line.
*
* Example:
*
@@ -810,13 +818,13 @@ class RegExpDot extends RegExpSpecialChar {
* ```
*/
class RegExpDollar extends RegExpSpecialChar {
RegExpDollar() { this.getChar() = "$" }
RegExpDollar() { this.getChar() = ["$", "\\Z"] }
override string getPrimaryQLClass() { result = "RegExpDollar" }
}
/**
* A caret assertion `^` matching the beginning of a line.
* A caret assertion `^` or `\A` matching the beginning of a line.
*
* Example:
*
@@ -825,7 +833,7 @@ class RegExpDollar extends RegExpSpecialChar {
* ```
*/
class RegExpCaret extends RegExpSpecialChar {
RegExpCaret() { this.getChar() = "^" }
RegExpCaret() { this.getChar() = ["^", "\\A"] }
override string getPrimaryQLClass() { result = "RegExpCaret" }
}

View File

@@ -86,7 +86,7 @@ class SsaVariable extends @py_ssa_var {
/** Gets the incoming edges for a Phi node. */
private BasicBlock getAPredecessorBlockForPhi() {
exists(getAPhiInput()) and
exists(this.getAPhiInput()) and
result.getASuccessor() = this.getDefinition().getBasicBlock()
}

View File

@@ -107,7 +107,7 @@ class SpecialMethodCallNode extends PotentialSpecialMethodCallNode {
SpecialMethodCallNode() {
exists(SpecialMethod::Potential pot |
this.(SpecialMethod::Potential) = pot and
this = pot and
pot.getSelf().pointsTo().getClass().lookup(pot.getSpecialMethodName()) = resolvedSpecialMethod
)
}

View File

@@ -18,10 +18,15 @@ class Stmt extends Stmt_, AstNode {
/** Gets an immediate (non-nested) sub-statement of this statement */
Stmt getASubStatement() { none() }
/** Gets an immediate (non-nested) sub-pattern of this statement */
Pattern getASubPattern() { none() }
override AstNode getAChildNode() {
result = this.getASubExpression()
or
result = this.getASubStatement()
or
result = this.getASubPattern()
}
private ControlFlowNode possibleEntryNode() {
@@ -94,13 +99,13 @@ class AugAssign extends AugAssign_ {
* Gets the target of this augmented assignment statement.
* That is, the `a` in `a += b`.
*/
Expr getTarget() { result = this.getOperation().(BinaryExpr).getLeft() }
Expr getTarget() { result = this.getOperation().getLeft() }
/**
* Gets the value of this augmented assignment statement.
* That is, the `b` in `a += b`.
*/
Expr getValue() { result = this.getOperation().(BinaryExpr).getRight() }
Expr getValue() { result = this.getOperation().getRight() }
override Stmt getASubStatement() { none() }
}
@@ -412,6 +417,24 @@ class With extends With_ {
override Stmt getLastStatement() { result = this.getBody().getLastItem().getLastStatement() }
}
/** A match statement */
class MatchStmt extends MatchStmt_ {
/* syntax: match subject: */
override Expr getASubExpression() { result = this.getSubject() }
override Stmt getASubStatement() { result = this.getCase(_) }
}
/** A case statement */
class Case extends Case_ {
/* syntax: case pattern if guard: */
override Expr getASubExpression() { result = this.getGuard() }
override Stmt getASubStatement() { result = this.getStmt(_) }
override Pattern getASubPattern() { result = this.getPattern() }
}
/** A plain text used in a template is wrapped in a TemplateWrite statement */
class TemplateWrite extends TemplateWrite_ {
override Expr getASubExpression() { result = this.getValue() }

View File

@@ -57,7 +57,7 @@ class LocalVariable extends Variable {
override string toString() { result = "Local Variable " + this.getId() }
/** Whether this variable is a parameter */
override predicate isParameter() { exists(Parameter p | this.getAnAccess() = p) }
override predicate isParameter() { this.getAnAccess() instanceof Parameter }
/** Holds if this variable is the first parameter of a method. It is not necessarily called "self" */
override predicate isSelf() {
@@ -87,7 +87,7 @@ class NameLocalVariable extends LocalVariable {
/** A global (module-level) variable */
class GlobalVariable extends Variable {
GlobalVariable() { exists(Module m | m = this.getScope()) }
GlobalVariable() { this.getScope() instanceof Module }
override string toString() { result = "Global Variable " + this.getId() }
}

View File

@@ -1,55 +1,10 @@
/**
* Provides classes modeling cryptographic algorithms, separated into strong and weak variants.
*
* The classification into strong and weak are based on Wikipedia, OWASP and google (2017).
* The classification into strong and weak are based on Wikipedia, OWASP and Google (2021).
*/
/**
* Names of cryptographic algorithms, separated into strong and weak variants.
*
* The names are normalized: upper-case, no spaces, dashes or underscores.
*
* The names are inspired by the names used in real world crypto libraries.
*
* The classification into strong and weak are based on Wikipedia, OWASP and google (2017).
*/
private module AlgorithmNames {
predicate isStrongHashingAlgorithm(string name) {
name =
[
"DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2",
"SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512"
]
}
predicate isWeakHashingAlgorithm(string name) {
name =
[
"HAVEL128", "MD2", "MD4", "MD5", "PANAMA", "RIPEMD", "RIPEMD128", "RIPEMD256", "RIPEMD160",
"RIPEMD320", "SHA0", "SHA1"
]
}
predicate isStrongEncryptionAlgorithm(string name) {
name = ["AES", "AES128", "AES192", "AES256", "AES512", "RSA", "RABBIT", "BLOWFISH"]
}
predicate isWeakEncryptionAlgorithm(string name) {
name =
[
"DES", "3DES", "TRIPLEDES", "TDEA", "TRIPLEDEA", "ARC2", "RC2", "ARC4", "RC4", "ARCFOUR",
"ARC5", "RC5"
]
}
predicate isStrongPasswordHashingAlgorithm(string name) {
name = ["ARGON2", "PBKDF2", "BCRYPT", "SCRYPT"]
}
predicate isWeakPasswordHashingAlgorithm(string name) { none() }
}
private import AlgorithmNames
private import internal.CryptoAlgorithmNames
/**
* A cryptographic algorithm.
@@ -85,11 +40,13 @@ abstract class CryptographicAlgorithm extends TCryptographicAlgorithm {
/**
* Holds if the name of this algorithm matches `name` modulo case,
* white space, dashes, and underscores.
* white space, dashes, underscores, and anything after a dash in the name
* (to ignore modes of operation, such as CBC or ECB).
*/
bindingset[name]
predicate matchesName(string name) {
name.toUpperCase().regexpReplaceAll("[-_ ]", "") = getName()
[name.toUpperCase(), name.toUpperCase().regexpCapture("^(\\w+)(?:-.*)?$", 1)]
.regexpReplaceAll("[-_ ]", "") = getName()
}
/**

View File

@@ -0,0 +1,72 @@
/**
* Names of cryptographic algorithms, separated into strong and weak variants.
*
* The names are normalized: upper-case, no spaces, dashes or underscores.
*
* The names are inspired by the names used in real world crypto libraries.
*
* The classification into strong and weak are based on Wikipedia, OWASP and Google (2021).
*/
/**
* Holds if `name` corresponds to a strong hashing algorithm.
*/
predicate isStrongHashingAlgorithm(string name) {
name =
[
"DSA", "ED25519", "ES256", "ECDSA256", "ES384", "ECDSA384", "ES512", "ECDSA512", "SHA2",
"SHA224", "SHA256", "SHA384", "SHA512", "SHA3", "SHA3224", "SHA3256", "SHA3384", "SHA3512"
]
}
/**
* Holds if `name` corresponds to a weak hashing algorithm.
*/
predicate isWeakHashingAlgorithm(string name) {
name =
[
"HAVEL128", "MD2", "MD4", "MD5", "PANAMA", "RIPEMD", "RIPEMD128", "RIPEMD256", "RIPEMD160",
"RIPEMD320", "SHA0", "SHA1"
]
}
/**
* Holds if `name` corresponds to a strong encryption algorithm.
*/
predicate isStrongEncryptionAlgorithm(string name) {
name =
[
"AES", "AES128", "AES192", "AES256", "AES512", "AES-128", "AES-192", "AES-256", "AES-512",
"ARIA", "BLOWFISH", "BF", "ECIES", "CAST", "CAST5", "CAMELLIA", "CAMELLIA128", "CAMELLIA192",
"CAMELLIA256", "CAMELLIA-128", "CAMELLIA-192", "CAMELLIA-256", "CHACHA", "GOST", "GOST89",
"IDEA", "RABBIT", "RSA", "SEED", "SM4"
]
}
/**
* Holds if `name` corresponds to a weak encryption algorithm.
*/
predicate isWeakEncryptionAlgorithm(string name) {
name =
[
"DES", "3DES", "DES3", "TRIPLEDES", "DESX", "TDEA", "TRIPLEDEA", "ARC2", "RC2", "ARC4", "RC4",
"ARCFOUR", "ARC5", "RC5"
]
}
/**
* Holds if `name` corresponds to a strong password hashing algorithm.
*/
predicate isStrongPasswordHashingAlgorithm(string name) {
name = ["ARGON2", "PBKDF2", "BCRYPT", "SCRYPT"]
}
/**
* Holds if `name` corresponds to a weak password hashing algorithm.
*/
predicate isWeakPasswordHashingAlgorithm(string name) { name = "EVPKDF" }
/**
* Holds if `name` corresponds to a weak block cipher mode of operation.
*/
predicate isWeakBlockMode(string name) { name = "ECB" }

View File

@@ -0,0 +1,93 @@
/** Provides predicates for reasoning about built-ins in Python. */
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.ImportStar
module Builtins {
/** Gets the name of a known built-in. */
string getBuiltinName() {
// These lists were created by inspecting the `builtins` and `__builtin__` modules in
// Python 3 and 2 respectively, using the `dir` built-in.
// Built-in functions and exceptions shared between Python 2 and 3
result in [
"abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
"compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
"float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
"id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
"max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
"property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
"staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__",
// Exceptions
"ArithmeticError", "AssertionError", "AttributeError", "BaseException", "BufferError",
"BytesWarning", "DeprecationWarning", "EOFError", "EnvironmentError", "Exception",
"FloatingPointError", "FutureWarning", "GeneratorExit", "IOError", "ImportError",
"ImportWarning", "IndentationError", "IndexError", "KeyError", "KeyboardInterrupt",
"LookupError", "MemoryError", "NameError", "NotImplemented", "NotImplementedError",
"OSError", "OverflowError", "PendingDeprecationWarning", "ReferenceError", "RuntimeError",
"RuntimeWarning", "StandardError", "StopIteration", "SyntaxError", "SyntaxWarning",
"SystemError", "SystemExit", "TabError", "TypeError", "UnboundLocalError",
"UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
"UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
// Added for compatibility
"exec"
]
or
// Built-in constants shared between Python 2 and 3
result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
or
// Python 3 only
result in [
"ascii", "breakpoint", "bytes", "exec",
// Exceptions
"BlockingIOError", "BrokenPipeError", "ChildProcessError", "ConnectionAbortedError",
"ConnectionError", "ConnectionRefusedError", "ConnectionResetError", "FileExistsError",
"FileNotFoundError", "InterruptedError", "IsADirectoryError", "ModuleNotFoundError",
"NotADirectoryError", "PermissionError", "ProcessLookupError", "RecursionError",
"ResourceWarning", "StopAsyncIteration", "TimeoutError"
]
or
// Python 2 only
result in [
"basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload", "unichr",
"unicode", "xrange"
]
}
/**
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
*
* Currently this is an over-approximation, and may not account for things like overwriting a
* built-in with a different value.
*/
DataFlow::Node likelyBuiltin(string name) {
exists(Module m |
result.asCfgNode() =
any(NameNode n |
possible_builtin_accessed_in_module(n, name, m) and
not possible_builtin_defined_in_module(name, m)
)
)
}
/**
* Holds if a global variable called `name` (which is also the name of a built-in) is assigned
* a value in the module `m`.
*/
private predicate possible_builtin_defined_in_module(string name, Module m) {
ImportStar::globalNameDefinedInModule(name, m) and
name = getBuiltinName()
}
/**
* Holds if `n` is an access of a global variable called `name` (which is also the name of a
* built-in) inside the module `m`.
*/
private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
n.isGlobal() and
n.isLoad() and
name = n.getId() and
name = getBuiltinName() and
m = n.getEnclosingModule()
}
}

View File

@@ -2,6 +2,53 @@ private import DataFlowImplSpecific::Private
private import DataFlowImplSpecific::Public
import Cached
module DataFlowImplCommonPublic {
/** A state value to track during data flow. */
class FlowState = string;
/**
* The default state, which is used when the state is unspecified for a source
* or a sink.
*/
class FlowStateEmpty extends FlowState {
FlowStateEmpty() { this = "" }
}
private newtype TFlowFeature =
TFeatureHasSourceCallContext() or
TFeatureHasSinkCallContext() or
TFeatureEqualSourceSinkCallContext()
/** A flow configuration feature for use in `Configuration::getAFeature()`. */
class FlowFeature extends TFlowFeature {
string toString() { none() }
}
/**
* A flow configuration feature that implies that sources have some existing
* call context.
*/
class FeatureHasSourceCallContext extends FlowFeature, TFeatureHasSourceCallContext {
override string toString() { result = "FeatureHasSourceCallContext" }
}
/**
* A flow configuration feature that implies that sinks have some existing
* call context.
*/
class FeatureHasSinkCallContext extends FlowFeature, TFeatureHasSinkCallContext {
override string toString() { result = "FeatureHasSinkCallContext" }
}
/**
* A flow configuration feature that implies that source-sink pairs have some
* shared existing call context.
*/
class FeatureEqualSourceSinkCallContext extends FlowFeature, TFeatureEqualSourceSinkCallContext {
override string toString() { result = "FeatureEqualSourceSinkCallContext" }
}
}
/**
* The cost limits for the `AccessPathFront` to `AccessPathApprox` expansion.
*
@@ -26,6 +73,18 @@ predicate accessPathCostLimits(int apLimit, int tupleLimit) {
tupleLimit = 1000
}
/**
* Holds if `arg` is an argument of `call` with an argument position that matches
* parameter position `ppos`.
*/
pragma[noinline]
predicate argumentPositionMatch(DataFlowCall call, ArgNode arg, ParameterPosition ppos) {
exists(ArgumentPosition apos |
arg.argumentOf(call, apos) and
parameterMatch(ppos, apos)
)
}
/**
* Provides a simple data-flow analysis for resolving lambda calls. The analysis
* currently excludes read-steps, store-steps, and flow-through.
@@ -35,25 +94,27 @@ predicate accessPathCostLimits(int apLimit, int tupleLimit) {
* calls. For this reason, we cannot reuse the code from `DataFlowImpl.qll` directly.
*/
private module LambdaFlow {
private predicate viableParamNonLambda(DataFlowCall call, int i, ParamNode p) {
p.isParameterOf(viableCallable(call), i)
pragma[noinline]
private predicate viableParamNonLambda(DataFlowCall call, ParameterPosition ppos, ParamNode p) {
p.isParameterOf(viableCallable(call), ppos)
}
private predicate viableParamLambda(DataFlowCall call, int i, ParamNode p) {
p.isParameterOf(viableCallableLambda(call, _), i)
pragma[noinline]
private predicate viableParamLambda(DataFlowCall call, ParameterPosition ppos, ParamNode p) {
p.isParameterOf(viableCallableLambda(call, _), ppos)
}
private predicate viableParamArgNonLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
exists(int i |
viableParamNonLambda(call, i, p) and
arg.argumentOf(call, i)
exists(ParameterPosition ppos |
viableParamNonLambda(call, ppos, p) and
argumentPositionMatch(call, arg, ppos)
)
}
private predicate viableParamArgLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
exists(int i |
viableParamLambda(call, i, p) and
arg.argumentOf(call, i)
exists(ParameterPosition ppos |
viableParamLambda(call, ppos, p) and
argumentPositionMatch(call, arg, ppos)
)
}
@@ -251,7 +312,7 @@ private module Cached {
predicate forceCachingInSameStage() { any() }
cached
predicate nodeEnclosingCallable(Node n, DataFlowCallable c) { c = n.getEnclosingCallable() }
predicate nodeEnclosingCallable(Node n, DataFlowCallable c) { c = nodeGetEnclosingCallable(n) }
cached
predicate callEnclosingCallable(DataFlowCall call, DataFlowCallable c) {
@@ -286,7 +347,7 @@ private module Cached {
or
exists(ArgNode arg |
result.(PostUpdateNode).getPreUpdateNode() = arg and
arg.argumentOf(call, k.(ParamUpdateReturnKind).getPosition())
arg.argumentOf(call, k.(ParamUpdateReturnKind).getAMatchingArgumentPosition())
)
}
@@ -294,7 +355,7 @@ private module Cached {
predicate returnNodeExt(Node n, ReturnKindExt k) {
k = TValueReturn(n.(ReturnNode).getKind())
or
exists(ParamNode p, int pos |
exists(ParamNode p, ParameterPosition pos |
parameterValueFlowsToPreUpdate(p, n) and
p.isParameterOf(_, pos) and
k = TParamUpdate(pos)
@@ -316,13 +377,13 @@ private module Cached {
}
cached
predicate parameterNode(Node n, DataFlowCallable c, int i) {
n.(ParameterNode).isParameterOf(c, i)
predicate parameterNode(Node p, DataFlowCallable c, ParameterPosition pos) {
isParameterNode(p, c, pos)
}
cached
predicate argumentNode(Node n, DataFlowCall call, int pos) {
n.(ArgumentNode).argumentOf(call, pos)
predicate argumentNode(Node n, DataFlowCall call, ArgumentPosition pos) {
isArgumentNode(n, call, pos)
}
/**
@@ -340,12 +401,12 @@ private module Cached {
}
/**
* Holds if `p` is the `i`th parameter of a viable dispatch target of `call`.
* The instance parameter is considered to have index `-1`.
* Holds if `p` is the parameter of a viable dispatch target of `call`,
* and `p` has position `ppos`.
*/
pragma[nomagic]
private predicate viableParam(DataFlowCall call, int i, ParamNode p) {
p.isParameterOf(viableCallableExt(call), i)
private predicate viableParam(DataFlowCall call, ParameterPosition ppos, ParamNode p) {
p.isParameterOf(viableCallableExt(call), ppos)
}
/**
@@ -354,9 +415,9 @@ private module Cached {
*/
cached
predicate viableParamArg(DataFlowCall call, ParamNode p, ArgNode arg) {
exists(int i |
viableParam(call, i, p) and
arg.argumentOf(call, i) and
exists(ParameterPosition ppos |
viableParam(call, ppos, p) and
argumentPositionMatch(call, arg, ppos) and
compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(p))
)
}
@@ -828,7 +889,7 @@ private module Cached {
cached
newtype TReturnKindExt =
TValueReturn(ReturnKind kind) or
TParamUpdate(int pos) { exists(ParamNode p | p.isParameterOf(_, pos)) }
TParamUpdate(ParameterPosition pos) { exists(ParamNode p | p.isParameterOf(_, pos)) }
cached
newtype TBooleanOption =
@@ -1020,9 +1081,9 @@ class ParamNode extends Node {
/**
* Holds if this node is the parameter of callable `c` at the specified
* (zero-based) position.
* position.
*/
predicate isParameterOf(DataFlowCallable c, int i) { parameterNode(this, c, i) }
predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) { parameterNode(this, c, pos) }
}
/** A data-flow node that represents a call argument. */
@@ -1030,7 +1091,9 @@ class ArgNode extends Node {
ArgNode() { argumentNode(this, _, _) }
/** Holds if this argument occurs at the given position in the given call. */
final predicate argumentOf(DataFlowCall call, int pos) { argumentNode(this, call, pos) }
final predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
argumentNode(this, call, pos)
}
}
/**
@@ -1076,11 +1139,14 @@ class ValueReturnKind extends ReturnKindExt, TValueReturn {
}
class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
private int pos;
private ParameterPosition pos;
ParamUpdateReturnKind() { this = TParamUpdate(pos) }
int getPosition() { result = pos }
ParameterPosition getPosition() { result = pos }
pragma[nomagic]
ArgumentPosition getAMatchingArgumentPosition() { parameterMatch(pos, result) }
override string toString() { result = "param update " + pos }
}

View File

@@ -9,6 +9,31 @@ private import tainttracking1.TaintTrackingParameter::Private
private import tainttracking1.TaintTrackingParameter::Public
module Consistency {
private newtype TConsistencyConfiguration = MkConsistencyConfiguration()
/** A class for configuring the consistency queries. */
class ConsistencyConfiguration extends TConsistencyConfiguration {
string toString() { none() }
/** Holds if `n` should be excluded from the consistency test `uniqueEnclosingCallable`. */
predicate uniqueEnclosingCallableExclude(Node n) { none() }
/** Holds if `n` should be excluded from the consistency test `uniqueNodeLocation`. */
predicate uniqueNodeLocationExclude(Node n) { none() }
/** Holds if `n` should be excluded from the consistency test `missingLocation`. */
predicate missingLocationExclude(Node n) { none() }
/** Holds if `n` should be excluded from the consistency test `postWithInFlow`. */
predicate postWithInFlowExclude(Node n) { none() }
/** Holds if `n` should be excluded from the consistency test `argHasPostUpdate`. */
predicate argHasPostUpdateExclude(ArgumentNode n) { none() }
/** Holds if `n` should be excluded from the consistency test `reverseRead`. */
predicate reverseReadExclude(Node n) { none() }
}
private class RelevantNode extends Node {
RelevantNode() {
this instanceof ArgumentNode or
@@ -31,8 +56,9 @@ module Consistency {
query predicate uniqueEnclosingCallable(Node n, string msg) {
exists(int c |
n instanceof RelevantNode and
c = count(n.getEnclosingCallable()) and
c = count(nodeGetEnclosingCallable(n)) and
c != 1 and
not any(ConsistencyConfiguration conf).uniqueEnclosingCallableExclude(n) and
msg = "Node should have one enclosing callable but has " + c + "."
)
}
@@ -53,6 +79,7 @@ module Consistency {
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
) and
c != 1 and
not any(ConsistencyConfiguration conf).uniqueNodeLocationExclude(n) and
msg = "Node should have one location but has " + c + "."
)
}
@@ -63,7 +90,8 @@ module Consistency {
strictcount(Node n |
not exists(string filepath, int startline, int startcolumn, int endline, int endcolumn |
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
)
) and
not any(ConsistencyConfiguration conf).missingLocationExclude(n)
) and
msg = "Nodes without location: " + c
)
@@ -85,13 +113,13 @@ module Consistency {
}
query predicate parameterCallable(ParameterNode p, string msg) {
exists(DataFlowCallable c | p.isParameterOf(c, _) and c != p.getEnclosingCallable()) and
exists(DataFlowCallable c | isParameterNode(p, c, _) and c != nodeGetEnclosingCallable(p)) and
msg = "Callable mismatch for parameter."
}
query predicate localFlowIsLocal(Node n1, Node n2, string msg) {
simpleLocalFlowStep(n1, n2) and
n1.getEnclosingCallable() != n2.getEnclosingCallable() and
nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and
msg = "Local flow step does not preserve enclosing callable."
}
@@ -106,7 +134,7 @@ module Consistency {
query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) {
isUnreachableInCall(n, call) and
exists(DataFlowCallable c |
c = n.getEnclosingCallable() and
c = nodeGetEnclosingCallable(n) and
not viableCallable(call) = c
) and
msg = "Call context for isUnreachableInCall is inconsistent with call graph."
@@ -120,7 +148,7 @@ module Consistency {
n.(ArgumentNode).argumentOf(call, _) and
msg = "ArgumentNode and call does not share enclosing callable."
) and
n.getEnclosingCallable() != call.getEnclosingCallable()
nodeGetEnclosingCallable(n) != call.getEnclosingCallable()
}
// This predicate helps the compiler forget that in some languages
@@ -151,7 +179,7 @@ module Consistency {
}
query predicate postIsInSameCallable(PostUpdateNode n, string msg) {
n.getEnclosingCallable() != n.getPreUpdateNode().getEnclosingCallable() and
nodeGetEnclosingCallable(n) != nodeGetEnclosingCallable(n.getPreUpdateNode()) and
msg = "PostUpdateNode does not share callable with its pre-update node."
}
@@ -159,12 +187,13 @@ module Consistency {
query predicate reverseRead(Node n, string msg) {
exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and
not any(ConsistencyConfiguration conf).reverseReadExclude(n) and
msg = "Origin of readStep is missing a PostUpdateNode."
}
query predicate argHasPostUpdate(ArgumentNode n, string msg) {
not hasPost(n) and
not isImmutableOrUnobservable(n) and
not any(ConsistencyConfiguration c).argHasPostUpdateExclude(n) and
msg = "ArgumentNode is missing PostUpdateNode."
}
@@ -177,6 +206,7 @@ module Consistency {
isPostUpdateNode(n) and
not clearsContent(n, _) and
simpleLocalFlowStep(_, n) and
not any(ConsistencyConfiguration c).postWithInFlowExclude(n) and
msg = "PostUpdateNode should not be the target of local flow."
}
}

View File

@@ -2,6 +2,34 @@ private import python
private import DataFlowPublic
import semmle.python.SpecialMethods
private import semmle.python.essa.SsaCompute
private import semmle.python.dataflow.new.internal.ImportStar
/** Gets the callable in which this node occurs. */
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
/** A parameter position represented by an integer. */
class ParameterPosition extends int {
ParameterPosition() { exists(any(DataFlowCallable c).getParameter(this)) }
}
/** An argument position represented by an integer. */
class ArgumentPosition extends int {
ArgumentPosition() { exists(any(DataFlowCall c).getArg(this)) }
}
/** Holds if arguments at position `apos` match parameters at position `ppos`. */
pragma[inline]
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos }
/** Holds if `p` is a `ParameterNode` of `c` with position `pos`. */
predicate isParameterNode(ParameterNode p, DataFlowCallable c, ParameterPosition pos) {
p.isParameterOf(c, pos)
}
/** Holds if `arg` is an `ArgumentNode` of `c` with position `pos`. */
predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos) {
arg.argumentOf(c, pos)
}
//--------
// Data flow graph
@@ -172,9 +200,23 @@ module EssaFlow {
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
with.getContextExpr() = contextManager.getNode() and
with.getOptionalVars() = var.getNode() and
not with.isAsync() and
contextManager.strictlyDominates(var)
)
or
// Async with var definition
// `async with f(42) as x:`
// nodeFrom is `x`, cfg node
// nodeTo is `x`, essa var
//
// This makes the cfg node the local source of the awaited value.
exists(With with, ControlFlowNode var |
nodeFrom.(CfgNode).getNode() = var and
nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
with.getOptionalVars() = var.getNode() and
with.isAsync()
)
or
// Parameter definition
// `def foo(x):`
// nodeFrom is `x`, cfgNode
@@ -207,6 +249,8 @@ module EssaFlow {
// Flow inside an unpacking assignment
iterableUnpackingFlowStep(nodeFrom, nodeTo)
or
matchFlowStep(nodeFrom, nodeTo)
or
// Overflow keyword argument
exists(CallNode call, CallableValue callable |
call = callable.getACall() and
@@ -343,7 +387,7 @@ private Node update(Node node) {
* ```python
* f(0, 1, 2, a=3)
* ```
* will be modelled as
* will be modeled as
* ```python
* f(0, 1, [*t], [**d])
* ```
@@ -356,7 +400,7 @@ private Node update(Node node) {
* ```python
* f(0, **{"y": 1, "a": 3})
* ```
* no tuple argument is synthesized. It is modelled as
* no tuple argument is synthesized. It is modeled as
* ```python
* f(0, [y=1], [**d])
* ```
@@ -907,7 +951,7 @@ predicate jumpStep(Node nodeFrom, Node nodeTo) {
private predicate module_export(Module m, string name, CfgNode defn) {
exists(EssaVariable v |
v.getName() = name and
v.getAUse() = m.getANormalExit()
v.getAUse() = ImportStar::getStarImported*(m).getANormalExit()
|
defn.getNode() = v.getDefinition().(AssignmentDefinition).getValue()
or
@@ -940,6 +984,8 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
posOverflowStoreStep(nodeFrom, c, nodeTo)
or
kwOverflowStoreStep(nodeFrom, c, nodeTo)
or
matchStoreStep(nodeFrom, c, nodeTo)
}
/** Data flows from an element of a list to the list. */
@@ -1082,6 +1128,8 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
or
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
or
matchReadStep(nodeFrom, c, nodeTo)
or
popReadStep(nodeFrom, c, nodeTo)
or
forReadStep(nodeFrom, c, nodeTo)
@@ -1345,10 +1393,8 @@ module IterableUnpacking {
}
/** A (possibly recursive) target of an unpacking assignment which is also a sequence. */
class UnpackingAssignmentSequenceTarget extends UnpackingAssignmentTarget {
UnpackingAssignmentSequenceTarget() { this instanceof SequenceNode }
ControlFlowNode getElement(int i) { result = this.(SequenceNode).getElement(i) }
class UnpackingAssignmentSequenceTarget extends UnpackingAssignmentTarget instanceof SequenceNode {
ControlFlowNode getElement(int i) { result = super.getElement(i) }
ControlFlowNode getAnElement() { result = this.getElement(_) }
}
@@ -1513,6 +1559,318 @@ module IterableUnpacking {
import IterableUnpacking
/**
* There are a number of patterns available for the match statement.
* Each one transfers data and content differently to its parts.
*
* Furthermore, given a successful match, we can infer some data about
* the subject. Consider the example:
* ```python
* match choice:
* case 'Y':
* ...body
* ```
* Inside `body`, we know that `choice` has the value `'Y'`.
*
* A similar thing happens with the "as pattern". Consider the example:
* ```python
* match choice:
* case ('y'|'Y') as c:
* ...body
* ```
* By the binding rules, there is data flow from `choice` to `c`. But we
* can infer the value of `c` to be either `'y'` or `'Y'` if the match succeeds.
*
* We will treat such inferences separately as guards. First we will model the data flow
* stemming from the bindings and the matching of shape. Below, 'subject' is not necessarily the
* top-level subject of the match, but rather the part recursively matched by the current pattern.
* For instance, in the example:
* ```python
* match command:
* case ('quit' as c) | ('go', ('up'|'down') as c):
* ...body
* ```
* `command` is the subject of first the as-pattern, while the second component of `command`
* is the subject of the second as-pattern. As such, 'subject' refers to the pattern under evaluation.
*
* - as pattern: subject flows to alias as well as to the interior pattern
* - or pattern: subject flows to each alternative
* - literal pattern: flow from the literal to the pattern, to add information
* - capture pattern: subject flows to the variable
* - wildcard pattern: no flow
* - value pattern: flow from the value to the pattern, to add information
* - sequence pattern: each element reads from subject at the associated index
* - star pattern: subject flows to the variable, possibly via a conversion
* - mapping pattern: each value reads from subject at the associated key
* - double star pattern: subject flows to the variable, possibly via a conversion
* - key-value pattern: the value reads from the subject at the key (see mapping pattern)
* - class pattern: all keywords read the appropriate attribute from the subject
* - keyword pattern: the appropriate attribute is read from the subject (see class pattern)
*
* Inside the class pattern, we also find positional arguments. They are converted to
* keyword arguments using the `__match_args__` attribute on the class. We do not
* currently model this.
*/
module MatchUnpacking {
/**
* The subject of a match flows to each top-level pattern
* (a pattern directly under a `case` statement).
*
* We could consider a model closer to use-use-flow, where the subject
* only flows to the first top-level pattern and from there to the
* following ones.
*/
predicate matchSubjectFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchStmt match, Expr subject, Pattern target |
subject = match.getSubject() and
target = match.getCase(_).(Case).getPattern()
|
nodeFrom.asExpr() = subject and
nodeTo.asCfgNode().getNode() = target
)
}
/**
* as pattern: subject flows to alias as well as to the interior pattern
* syntax (toplevel): `case pattern as alias:`
*/
predicate matchAsFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchAsPattern subject, Name alias | alias = subject.getAlias() |
// We make the subject flow to the interior pattern via the alias.
// That way, information can propagate from the interior pattern to the alias.
//
// the subject flows to the interior pattern
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = subject.getPattern()
or
// the interior pattern flows to the alias
nodeFrom.asCfgNode().getNode() = subject.getPattern() and
nodeTo.asVar().getDefinition().(PatternAliasDefinition).getDefiningNode().getNode() = alias
)
}
/**
* or pattern: subject flows to each alternative
* syntax (toplevel): `case alt1 | alt2:`
*/
predicate matchOrFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchOrPattern subject, Pattern pattern | pattern = subject.getAPattern() |
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = pattern
)
}
/**
* literal pattern: flow from the literal to the pattern, to add information
* syntax (toplevel): `case literal:`
*/
predicate matchLiteralFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchLiteralPattern pattern, Expr literal | literal = pattern.getLiteral() |
nodeFrom.asExpr() = literal and
nodeTo.asCfgNode().getNode() = pattern
)
}
/**
* capture pattern: subject flows to the variable
* syntax (toplevel): `case var:`
*/
predicate matchCaptureFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchCapturePattern capture, Name var | capture.getVariable() = var |
nodeFrom.asCfgNode().getNode() = capture and
nodeTo.asVar().getDefinition().(PatternCaptureDefinition).getDefiningNode().getNode() = var
)
}
/**
* value pattern: flow from the value to the pattern, to add information
* syntax (toplevel): `case Dotted.value:`
*/
predicate matchValueFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchValuePattern pattern, Expr value | value = pattern.getValue() |
nodeFrom.asExpr() = value and
nodeTo.asCfgNode().getNode() = pattern
)
}
/**
* sequence pattern: each element reads from subject at the associated index
* syntax (toplevel): `case [a, b]:`
*/
predicate matchSequenceReadStep(Node nodeFrom, Content c, Node nodeTo) {
exists(MatchSequencePattern subject, int index, Pattern element |
element = subject.getPattern(index)
|
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = element and
(
// tuple content
c.(TupleElementContent).getIndex() = index
or
// list content
c instanceof ListElementContent
// set content is excluded from sequence patterns,
// see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
)
)
}
/**
* star pattern: subject flows to the variable, possibly via a conversion
* syntax (toplevel): `case *var:`
*
* We decompose this flow into a read step and a store step. The read step
* reads both tuple and list content, the store step only stores list content.
* This way, we convert all content to list content.
*
* This is the read step.
*/
predicate matchStarReadStep(Node nodeFrom, Content c, Node nodeTo) {
exists(MatchSequencePattern subject, int index, MatchStarPattern star |
star = subject.getPattern(index)
|
nodeFrom.asCfgNode().getNode() = subject and
nodeTo = TStarPatternElementNode(star) and
(
// tuple content
c.(TupleElementContent).getIndex() >= index
or
// list content
c instanceof ListElementContent
// set content is excluded from sequence patterns,
// see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
)
)
}
/**
* star pattern: subject flows to the variable, possibly via a conversion
* syntax (toplevel): `case *var:`
*
* We decompose this flow into a read step and a store step. The read step
* reads both tuple and list content, the store step only stores list content.
* This way, we convert all content to list content.
*
* This is the store step.
*/
predicate matchStarStoreStep(Node nodeFrom, Content c, Node nodeTo) {
exists(MatchStarPattern star |
nodeFrom = TStarPatternElementNode(star) and
nodeTo.asCfgNode().getNode() = star.getTarget() and
c instanceof ListElementContent
)
}
/**
* mapping pattern: each value reads from subject at the associated key
* syntax (toplevel): `case {"color": c, "height": x}:`
*/
predicate matchMappingReadStep(Node nodeFrom, Content c, Node nodeTo) {
exists(
MatchMappingPattern subject, MatchKeyValuePattern keyValue, MatchLiteralPattern key,
Pattern value
|
keyValue = subject.getAMapping() and
key = keyValue.getKey() and
value = keyValue.getValue()
|
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = value and
c.(DictionaryElementContent).getKey() = key.getLiteral().(StrConst).getText()
)
}
/**
* double star pattern: subject flows to the variable, possibly via a conversion
* syntax (toplevel): `case {**var}:`
*
* Dictionary content flows to the double star, but all mentioned keys in the
* mapping pattern should be cleared.
*/
predicate matchMappingFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchMappingPattern subject, MatchDoubleStarPattern dstar |
dstar = subject.getAMapping()
|
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = dstar.getTarget()
)
}
/**
* Bindings that are mentioned in a mapping pattern will not be available
* to a double star pattern in the same mapping pattern.
*/
predicate matchMappingClearStep(Node n, Content c) {
exists(
MatchMappingPattern subject, MatchKeyValuePattern keyValue, MatchLiteralPattern key,
MatchDoubleStarPattern dstar
|
keyValue = subject.getAMapping() and
key = keyValue.getKey() and
dstar = subject.getAMapping()
|
n.asCfgNode().getNode() = dstar.getTarget() and
c.(DictionaryElementContent).getKey() = key.getLiteral().(StrConst).getText()
)
}
/**
* class pattern: all keywords read the appropriate attribute from the subject
* syntax (toplevel): `case ClassName(attr = val):`
*/
predicate matchClassReadStep(Node nodeFrom, Content c, Node nodeTo) {
exists(MatchClassPattern subject, MatchKeywordPattern keyword, Name attr, Pattern value |
keyword = subject.getKeyword(_) and
attr = keyword.getAttribute() and
value = keyword.getValue()
|
nodeFrom.asCfgNode().getNode() = subject and
nodeTo.asCfgNode().getNode() = value and
c.(AttributeContent).getAttribute() = attr.getId()
)
}
/** All flow steps associated with match. */
predicate matchFlowStep(Node nodeFrom, Node nodeTo) {
matchSubjectFlowStep(nodeFrom, nodeTo)
or
matchAsFlowStep(nodeFrom, nodeTo)
or
matchOrFlowStep(nodeFrom, nodeTo)
or
matchLiteralFlowStep(nodeFrom, nodeTo)
or
matchCaptureFlowStep(nodeFrom, nodeTo)
or
matchValueFlowStep(nodeFrom, nodeTo)
or
matchMappingFlowStep(nodeFrom, nodeTo)
}
/** All read steps associated with match. */
predicate matchReadStep(Node nodeFrom, Content c, Node nodeTo) {
matchClassReadStep(nodeFrom, c, nodeTo)
or
matchSequenceReadStep(nodeFrom, c, nodeTo)
or
matchMappingReadStep(nodeFrom, c, nodeTo)
or
matchStarReadStep(nodeFrom, c, nodeTo)
}
/** All store steps associated with match. */
predicate matchStoreStep(Node nodeFrom, Content c, Node nodeTo) {
matchStarStoreStep(nodeFrom, c, nodeTo)
}
/**
* All clear steps associated with match
*/
predicate matchClearStep(Node n, Content c) { matchMappingClearStep(n, c) }
}
import MatchUnpacking
/** Data flows from a sequence to a call to `pop` on the sequence. */
predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
// set.pop or list.pop
@@ -1595,11 +1953,10 @@ predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node no
}
/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
* Clear content at key `name` of the synthesized dictionary `TKwOverflowNode(call, callable)`,
* whenever `call` unpacks `name`.
*/
predicate clearsContent(Node n, Content c) {
predicate kwOverflowClearStep(Node n, Content c) {
exists(CallNode call, CallableValue callable, string name |
call_unpacks(call, _, callable, name, _) and
n = TKwOverflowNode(call, callable) and
@@ -1607,6 +1964,17 @@ predicate clearsContent(Node n, Content c) {
)
}
/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
*/
predicate clearsContent(Node n, Content c) {
kwOverflowClearStep(n, c)
or
matchClearStep(n, c)
}
//--------
// Fancy context-sensitive guards
//--------
@@ -1631,18 +1999,6 @@ DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) {
*/
predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable c) { none() }
//--------
// Misc
//--------
/**
* Holds if `n` does not require a `PostUpdateNode` as it either cannot be
* modified or its modification cannot be observed, for example if it is a
* freshly created object that is not saved in a variable.
*
* This predicate is only used for consistency checks.
*/
predicate isImmutableOrUnobservable(Node n) { none() }
int accessPathLimit() { result = 5 }
/**

View File

@@ -8,6 +8,7 @@ import semmle.python.dataflow.new.TypeTracker
import Attributes
import LocalSources
private import semmle.python.essa.SsaCompute
private import semmle.python.dataflow.new.internal.ImportStar
/**
* IPA type for data flow nodes.
@@ -24,13 +25,25 @@ newtype TNode =
/** A node corresponding to an SSA variable. */
TEssaNode(EssaVariable var) or
/** A node corresponding to a control flow node. */
TCfgNode(ControlFlowNode node) { isExpressionNode(node) } or
TCfgNode(ControlFlowNode node) {
isExpressionNode(node)
or
node.getNode() instanceof Pattern
} or
/** A synthetic node representing the value of an object before a state change */
TSyntheticPreUpdateNode(NeedsSyntheticPreUpdateNode post) or
/** A synthetic node representing the value of an object after a state change. */
TSyntheticPostUpdateNode(NeedsSyntheticPostUpdateNode pre) or
/** A node representing a global (module-level) variable in a specific module. */
TModuleVariableNode(Module m, GlobalVariable v) { v.getScope() = m and v.escapes() } or
TModuleVariableNode(Module m, GlobalVariable v) {
v.getScope() = m and
(
v.escapes()
or
isAccessedThroughImportStar(m) and
ImportStar::globalNameDefinedInModule(v.getId(), m)
)
} or
/**
* A node representing the overflow positional arguments to a call.
* That is, `call` contains more positional arguments than there are
@@ -70,7 +83,11 @@ newtype TNode =
* A synthetic node representing that there may be an iterable element
* for `consumer` to consume.
*/
TIterableElementNode(UnpackingAssignmentTarget consumer)
TIterableElementNode(UnpackingAssignmentTarget consumer) or
/**
* A synthetic node representing element content in a star pattern.
*/
TStarPatternElementNode(MatchStarPattern target)
/** Helper for `Node::getEnclosingCallable`. */
private DataFlowCallable getCallableScope(Scope s) {
@@ -346,6 +363,8 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
result.asCfgNode() = var.getALoad().getAFlowNode() and
// Ignore reads that happen when the module is imported. These are only executed once.
not result.getScope() = mod
or
this = import_star_read(result)
}
/** Gets an `EssaNode` that corresponds to an assignment of this global variable. */
@@ -358,6 +377,13 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
override Location getLocation() { result = mod.getLocation() }
}
private predicate isAccessedThroughImportStar(Module m) { m = ImportStar::getStarImported(_) }
private ModuleVariableNode import_star_read(Node n) {
ImportStar::importStarResolvesTo(n.asCfgNode(), result.getModule()) and
n.asCfgNode().(NameNode).getId() = result.getVariable().getId()
}
/**
* The node holding the extra positional arguments to a call. This node is passed as a tuple
* to the starred parameter of the callable.
@@ -458,6 +484,21 @@ class IterableElementNode extends Node, TIterableElementNode {
override Location getLocation() { result = consumer.getLocation() }
}
/**
* A synthetic node representing element content of a star pattern.
*/
class StarPatternElementNode extends Node, TStarPatternElementNode {
CfgNode consumer;
StarPatternElementNode() { this = TStarPatternElementNode(consumer.getNode().getNode()) }
override string toString() { result = "StarPatternElement" }
override DataFlowCallable getEnclosingCallable() { result = consumer.getEnclosingCallable() }
override Location getLocation() { result = consumer.getLocation() }
}
/**
* A node that controls whether other nodes are evaluated.
*/

View File

@@ -0,0 +1,95 @@
/** Provides predicates for reasoning about uses of `import *` in Python. */
private import python
private import semmle.python.dataflow.new.internal.Builtins
cached
module ImportStar {
/**
* Holds if `n` is an access of a variable called `name` (which is _not_ the name of a
* built-in, and which is _not_ a global defined in the enclosing module) inside the scope `s`.
*/
cached
predicate namePossiblyDefinedInImportStar(NameNode n, string name, Scope s) {
n.isLoad() and
name = n.getId() and
s = n.getScope().getEnclosingScope*() and
exists(potentialImportStarBase(s)) and
// Not already defined in an enclosing scope.
not isDefinedLocally(n.getNode())
}
/** Holds if `n` refers to a variable that is defined in the module in which it occurs. */
cached
private predicate isDefinedLocally(Name n) {
// Defined in an enclosing scope
enclosing_scope_defines_name(n.getScope(), n.getId())
or
// Defined as a built-in
n.getId() = Builtins::getBuiltinName()
or
// Defined as a global in this module
globalNameDefinedInModule(n.getId(), n.getEnclosingModule())
or
// A non-built-in that still has file-specific meaning
n.getId() in ["__name__", "__package__"]
}
pragma[nomagic]
private predicate enclosing_scope_defines_name(Scope s, string name) {
exists(LocalVariable v |
v.getId() = name and v.getScope() = s and not name = Builtins::getBuiltinName()
)
or
enclosing_scope_defines_name(s.getEnclosingScope(), name)
}
/** Holds if a global variable called `name` is assigned a value in the module `m`. */
cached
predicate globalNameDefinedInModule(string name, Module m) {
exists(NameNode n |
not exists(LocalVariable v | n.defines(v)) and
n.isStore() and
name = n.getId() and
m = n.getEnclosingModule()
)
}
/**
* Holds if `n` may refer to a global variable of the same name in the module `m`, accessible
* from the scope of `n` by a chain of `import *` imports.
*/
cached
predicate importStarResolvesTo(NameNode n, Module m) {
m = getStarImported+(n.getEnclosingModule()) and
globalNameDefinedInModule(n.getId(), m) and
not isDefinedLocally(n.getNode())
}
/**
* Gets a module that is imported from `m` via `import *`.
*/
cached
Module getStarImported(Module m) {
exists(ImportStar i |
i.getScope() = m and result = i.getModule().pointsTo().(ModuleValue).getScope()
)
}
/**
* Gets the data-flow node for a module imported with `from ... import *` inside the scope `s`.
*
* For example, given
*
* ```python
* from foo.bar import *
* from quux import *
* ```
*
* this would return the data-flow nodes corresponding to `foo.bar` and `quux`.
*/
cached
ControlFlowNode potentialImportStarBase(Scope s) {
result = any(ImportStarNode n | n.getScope() = s).getModule()
}
}

View File

@@ -67,6 +67,6 @@ string prettyNodeForInlineTest(DataFlow::Node node) {
)
or
not exists(node.asExpr()) and
not exists(Expr e | e = node.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()) and
not exists(node.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()) and
result = node.toString()
}

View File

@@ -10,6 +10,12 @@ private import semmle.python.ApiGraphs
*/
predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
/**
* Holds if `guard` should be a sanitizer guard in all global taint flow configurations
* but not in local taint.
*/
predicate defaultTaintSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
/**
* Holds if default `TaintTracking::Configuration`s should allow implicit reads
* of `c` at sinks and inputs to additional taint steps.
@@ -53,6 +59,8 @@ private module Cached {
DataFlowPrivate::iterableUnpackingStoreStep(nodeFrom, _, nodeTo)
or
awaitStep(nodeFrom, nodeTo)
or
asyncWithStep(nodeFrom, nodeTo)
}
}
@@ -211,3 +219,24 @@ predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.asExpr().(Await).getValue() = nodeFrom.asExpr()
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` inside an `async with` statement.
*
* For example in
* ```python
* async with open("foo") as f:
* ```
* the variable `f` is tainted if the result of `open("foo")` is tainted.
*/
predicate asyncWithStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
nodeFrom.(DataFlow::CfgNode).getNode() = contextManager and
nodeTo.(DataFlow::EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
with.getContextExpr() = contextManager.getNode() and
with.getOptionalVars() = var.getNode() and
with.isAsync() and
contextManager.strictlyDominates(var)
)
}

View File

@@ -52,6 +52,24 @@ private module Cached {
)
}
/** Gets the summary resulting from prepending `step` to this type-tracking summary. */
cached
TypeBackTracker prepend(TypeBackTracker tbt, StepSummary step) {
exists(Boolean hasReturn, string content | tbt = MkTypeBackTracker(hasReturn, content) |
step = LevelStep() and result = tbt
or
step = CallStep() and hasReturn = false and result = tbt
or
step = ReturnStep() and result = MkTypeBackTracker(true, content)
or
exists(string p |
step = LoadStep(p) and content = "" and result = MkTypeBackTracker(hasReturn, p)
)
or
step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "")
)
}
/**
* Gets the summary that corresponds to having taken a forwards
* heap and/or intra-procedural step from `nodeFrom` to `nodeTo`.
@@ -365,19 +383,7 @@ class TypeBackTracker extends TTypeBackTracker {
TypeBackTracker() { this = MkTypeBackTracker(hasReturn, content) }
/** Gets the summary resulting from prepending `step` to this type-tracking summary. */
TypeBackTracker prepend(StepSummary step) {
step = LevelStep() and result = this
or
step = CallStep() and hasReturn = false and result = this
or
step = ReturnStep() and result = MkTypeBackTracker(true, content)
or
exists(string p |
step = LoadStep(p) and content = "" and result = MkTypeBackTracker(hasReturn, p)
)
or
step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "")
}
TypeBackTracker prepend(StepSummary step) { result = prepend(this, step) }
/** Gets a textual representation of this summary. */
string toString() {
@@ -459,6 +465,19 @@ class TypeBackTracker extends TTypeBackTracker {
simpleLocalFlowStep(nodeFrom, nodeTo) and
this = result
}
/**
* Gets a forwards summary that is compatible with this backwards summary.
* That is, if this summary describes the steps needed to back-track a value
* from `sink` to `mid`, and the result is a valid summary of the steps needed
* to track a value from `source` to `mid`, then the value from `source` may
* also flow to `sink`.
*/
TypeTracker getACompatibleTypeTracker() {
exists(boolean hasCall | result = MkTypeTracker(hasCall, content) |
hasCall = false or this.hasReturn() = false
)
}
}
/** Provides predicates for implementing custom `TypeBackTracker`s. */

View File

@@ -61,7 +61,7 @@ abstract class Configuration extends DataFlow::Configuration {
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSource(DataFlow::Node source);
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
@@ -69,7 +69,7 @@ abstract class Configuration extends DataFlow::Configuration {
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSink(DataFlow::Node sink);
override predicate isSink(DataFlow::Node sink) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -93,7 +93,7 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
this.isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard)
}
/**

View File

@@ -61,7 +61,7 @@ abstract class Configuration extends DataFlow::Configuration {
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSource(DataFlow::Node source);
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
@@ -69,7 +69,7 @@ abstract class Configuration extends DataFlow::Configuration {
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSink(DataFlow::Node sink);
override predicate isSink(DataFlow::Node sink) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -93,7 +93,7 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
this.isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard)
}
/**

View File

@@ -61,7 +61,7 @@ abstract class Configuration extends DataFlow::Configuration {
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSource(DataFlow::Node source);
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
@@ -69,7 +69,7 @@ abstract class Configuration extends DataFlow::Configuration {
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSink(DataFlow::Node sink);
override predicate isSink(DataFlow::Node sink) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -93,7 +93,7 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
this.isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard)
}
/**

View File

@@ -61,7 +61,7 @@ abstract class Configuration extends DataFlow::Configuration {
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSource(DataFlow::Node source);
override predicate isSource(DataFlow::Node source) { none() }
/**
* Holds if `sink` is a relevant taint sink.
@@ -69,7 +69,7 @@ abstract class Configuration extends DataFlow::Configuration {
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSink(DataFlow::Node sink);
override predicate isSink(DataFlow::Node sink) { none() }
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
@@ -93,7 +93,7 @@ abstract class Configuration extends DataFlow::Configuration {
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) {
this.isSanitizerGuard(guard)
this.isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard)
}
/**

View File

@@ -503,7 +503,7 @@ class TaintTrackingImplementation extends string {
TaintKind kind, string edgeLabel
) {
exists(PythonFunctionValue init, EssaVariable self, TaintTrackingContext callee |
instantiationCall(node.asCfgNode(), src, init, context, callee) and
this.instantiationCall(node.asCfgNode(), src, init, context, callee) and
this.(EssaTaintTracking).taintedDefinition(_, self.getDefinition(), callee, path, kind) and
self.getSourceVariable().(Variable).isSelf() and
BaseFlow::reaches_exit(self) and
@@ -789,9 +789,9 @@ private class EssaTaintTracking extends string {
TaintTrackingNode src, PyEdgeRefinement defn, TaintTrackingContext context, AttributePath path,
TaintKind kind
) {
taintedPiNodeOneway(src, defn, context, path, kind)
this.taintedPiNodeOneway(src, defn, context, path, kind)
or
taintedPiNodeBothways(src, defn, context, path, kind)
this.taintedPiNodeBothways(src, defn, context, path, kind)
}
pragma[noinline]
@@ -802,7 +802,7 @@ private class EssaTaintTracking extends string {
exists(DataFlow::Node srcnode, ControlFlowNode use |
src = TTaintTrackingNode_(srcnode, context, path, kind, this) and
not this.(TaintTracking::Configuration).isBarrierTest(defn.getTest(), defn.getSense()) and
defn.getSense() = testEvaluates(defn, defn.getTest(), use, src)
defn.getSense() = this.testEvaluates(defn, defn.getTest(), use, src)
)
}
@@ -898,7 +898,7 @@ private class EssaTaintTracking extends string {
)
)
or
result = testEvaluates(defn, not_operand(test), use, src).booleanNot()
result = this.testEvaluates(defn, not_operand(test), use, src).booleanNot()
}
/**
@@ -911,7 +911,7 @@ private class EssaTaintTracking extends string {
use = test
or
exists(ControlFlowNode notuse |
boolean_filter(test, notuse) and
this.boolean_filter(test, notuse) and
use = not_operand(notuse)
)
)

View File

@@ -639,16 +639,14 @@ module DataFlow {
}
}
deprecated private class ConfigurationAdapter extends TaintTracking::Configuration {
ConfigurationAdapter() { this instanceof Configuration }
deprecated private class ConfigurationAdapter extends TaintTracking::Configuration instanceof Configuration {
override predicate isSource(DataFlow::Node node, TaintKind kind) {
this.(Configuration).isSource(node.asCfgNode()) and
Configuration.super.isSource(node.asCfgNode()) and
kind instanceof DataFlowType
}
override predicate isSink(DataFlow::Node node, TaintKind kind) {
this.(Configuration).isSink(node.asCfgNode()) and
Configuration.super.isSink(node.asCfgNode()) and
kind instanceof DataFlowType
}
}

View File

@@ -14,16 +14,14 @@ string munge(File sourceFile, ExternalPackage package) {
result = "/" + sourceFile.getRelativePath() + "<|>" + package.getName() + "<|>unknown"
}
abstract class ExternalPackage extends Object {
ExternalPackage() { this instanceof ModuleObject }
abstract class ExternalPackage extends Object instanceof ModuleObject {
abstract string getName();
abstract string getVersion();
Object getAttribute(string name) { result = this.(ModuleObject).attr(name) }
Object getAttribute(string name) { result = super.attr(name) }
PackageObject getPackage() { result = this.(ModuleObject).getPackage() }
PackageObject getPackage() { result = super.getPackage() }
}
bindingset[text]

View File

@@ -70,6 +70,10 @@ abstract class SsaSourceVariable extends @py_variable {
SsaSource::exception_capture(this, def)
or
SsaSource::with_definition(this, def)
or
SsaSource::pattern_capture_definition(this, def)
or
SsaSource::pattern_alias_definition(this, def)
}
/**
@@ -152,17 +156,17 @@ class NonLocalVariable extends SsaSourceVariable {
}
override ControlFlowNode getAnImplicitUse() {
result.(CallNode).getScope().getScope*() = this.(LocalVariable).getScope()
result.(CallNode).getScope().getScope*() = this.scope_as_local_variable()
}
override ControlFlowNode getScopeEntryDefinition() {
exists(Function f |
f.getScope+() = this.(LocalVariable).getScope() and
f.getScope+() = this.scope_as_local_variable() and
f.getEntryNode() = result
)
or
not this.(LocalVariable).isParameter() and
this.(LocalVariable).getScope().getEntryNode() = result
this.scope_as_local_variable().getEntryNode() = result
}
pragma[noinline]
@@ -215,13 +219,16 @@ class ModuleVariable extends SsaSourceVariable {
)
}
pragma[nomagic]
private Scope scope_as_global_variable() { result = this.(GlobalVariable).getScope() }
pragma[noinline]
CallNode global_variable_callnode() { result.getScope() = this.(GlobalVariable).getScope() }
CallNode global_variable_callnode() { result.getScope() = this.scope_as_global_variable() }
pragma[noinline]
ImportMemberNode global_variable_import() {
result.getScope() = this.(GlobalVariable).getScope() and
import_from_dot_in_init(result.(ImportMemberNode).getModule(this.getName()))
result.getScope() = this.scope_as_global_variable() and
import_from_dot_in_init(result.getModule(this.getName()))
}
override ControlFlowNode getAnImplicitUse() {
@@ -250,7 +257,7 @@ class ModuleVariable extends SsaSourceVariable {
override ControlFlowNode getScopeEntryDefinition() {
exists(Scope s | s.getEntryNode() = result |
/* Module entry point */
this.(GlobalVariable).getScope() = s
this.scope_as_global_variable() = s
or
/* For implicit use of __metaclass__ when constructing class */
class_with_global_metaclass(s, this)
@@ -286,13 +293,13 @@ class EscapingGlobalVariable extends ModuleVariable {
override ControlFlowNode getAnImplicitUse() {
result = ModuleVariable.super.getAnImplicitUse()
or
result.(CallNode).getScope().getScope+() = this.(GlobalVariable).getScope()
result.(CallNode).getScope().getScope+() = this.scope_as_global_variable()
or
result = this.innerScope().getANormalExit()
}
private Scope innerScope() {
result.getScope+() = this.(GlobalVariable).getScope() and
result.getScope+() = this.scope_as_global_variable() and
not result instanceof ImportTimeScope
}
@@ -306,7 +313,7 @@ class EscapingGlobalVariable extends ModuleVariable {
Scope scope_as_global_variable() { result = this.(GlobalVariable).getScope() }
override CallNode redefinedAtCallSite() {
result.(CallNode).getScope().getScope*() = this.scope_as_global_variable()
result.getScope().getScope*() = this.scope_as_global_variable()
}
}
@@ -332,7 +339,7 @@ class SpecialSsaSourceVariable extends SsaSourceVariable {
Scope scope_as_global_variable() { result = this.(GlobalVariable).getScope() }
override CallNode redefinedAtCallSite() {
result.(CallNode).getScope().getScope*() = this.scope_as_global_variable()
result.getScope().getScope*() = this.scope_as_global_variable()
}
}

View File

@@ -8,7 +8,7 @@ import semmle.python.essa.Definitions
/** An (enhanced) SSA variable derived from `SsaSourceVariable`. */
class EssaVariable extends TEssaDefinition {
/** Gets the (unique) definition of this variable. */
/** Gets the (unique) definition of this variable. */
EssaDefinition getDefinition() { this = result }
/**
@@ -545,6 +545,24 @@ class WithDefinition extends EssaNodeDefinition {
override string getRepresentation() { result = "with" }
}
/** A definition of a variable via a capture pattern */
class PatternCaptureDefinition extends EssaNodeDefinition {
PatternCaptureDefinition() {
SsaSource::pattern_capture_definition(this.getSourceVariable(), this.getDefiningNode())
}
override string getRepresentation() { result = "pattern capture" }
}
/** A definition of a variable via a pattern alias */
class PatternAliasDefinition extends EssaNodeDefinition {
PatternAliasDefinition() {
SsaSource::pattern_alias_definition(this.getSourceVariable(), this.getDefiningNode())
}
override string getRepresentation() { result = "pattern alias" }
}
/** A definition of a variable by declaring it as a parameter */
class ParameterDefinition extends EssaNodeDefinition {
ParameterDefinition() {

View File

@@ -478,12 +478,11 @@ private module SsaComputeImpl {
predicate adjacentUseUse(ControlFlowNode use1, ControlFlowNode use2) {
adjacentUseUseSameVar(use1, use2)
or
exists(SsaSourceVariable v, EssaDefinition def, BasicBlock b1, int i1, BasicBlock b2, int i2 |
exists(SsaSourceVariable v, PhiFunction def, BasicBlock b1, int i1, BasicBlock b2, int i2 |
adjacentVarRefs(v, b1, i1, b2, i2) and
variableUse(v, use1, b1, i1) and
definesAt(def, v, b2, i2) and
firstUse(def, use2) and
def instanceof PhiFunction
variableUse(pragma[only_bind_into](v), use1, b1, i1) and
definesAt(def, pragma[only_bind_into](v), b2, i2) and
firstUse(def, use2)
)
}

View File

@@ -40,6 +40,28 @@ module SsaSource {
)
}
/** Holds if `v` is defined by a capture pattern. */
cached
predicate pattern_capture_definition(Variable v, ControlFlowNode defn) {
exists(MatchCapturePattern capture, Name var |
capture.getVariable() = var and
var.getAFlowNode() = defn
|
var = v.getAStore()
)
}
/** Holds if `v` is defined by as the alias of an as-pattern. */
cached
predicate pattern_alias_definition(Variable v, ControlFlowNode defn) {
exists(MatchAsPattern pattern, Name var |
pattern.getAlias() = var and
var.getAFlowNode() = defn
|
var = v.getAStore()
)
}
/** Holds if `v` is defined by multiple assignment at `defn`. */
cached
predicate multi_assignment_definition(Variable v, ControlFlowNode defn, int n, SequenceNode lhs) {
@@ -127,7 +149,7 @@ module SsaSource {
not test_contains(_, call)
}
/** Holds if an attribute is deleted at `def` and `use` is the use of `v` for that deletion */
/** Holds if an attribute is deleted at `def` and `use` is the use of `v` for that deletion */
cached
predicate attribute_deletion_refinement(Variable v, NameNode use, DeletionNode def) {
use.uses(v) and

View File

@@ -330,8 +330,8 @@ module AiohttpWebModel {
exists(Await await, DataFlow::CallCfgNode call, DataFlow::AttrRead read |
this.asExpr() = await
|
read.(DataFlow::AttrRead).getObject() = Request::instance() and
read.(DataFlow::AttrRead).getAttributeName() = "post" and
read.getObject() = Request::instance() and
read.getAttributeName() = "post" and
call.getFunction() = read and
await.getValue() = call.asExpr()
)

View File

@@ -0,0 +1,145 @@
/**
* Provides classes modeling security-relevant aspects of the `aiomysql` PyPI package.
* See
* - https://aiomysql.readthedocs.io/en/stable/index.html
* - https://pypi.org/project/aiomysql/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/** Provides models for the `aiomysql` PyPI package. */
private module Aiomysql {
private import semmle.python.internal.Awaited
/**
* A `ConectionPool` is created when the result of `aiomysql.create_pool()` is awaited.
* See https://aiomysql.readthedocs.io/en/stable/pool.html
*/
API::Node connectionPool() {
result = API::moduleImport("aiomysql").getMember("create_pool").getReturn().getAwaited()
}
/**
* A `Connection` is created when
* - the result of `aiomysql.connect()` is awaited.
* - the result of calling `aquire` on a `ConnectionPool` is awaited.
* See https://aiomysql.readthedocs.io/en/stable/connection.html#connection
*/
API::Node connection() {
result = API::moduleImport("aiomysql").getMember("connect").getReturn().getAwaited()
or
result = connectionPool().getMember("acquire").getReturn().getAwaited()
}
/**
* A `Cursor` is created when
* - the result of calling `cursor` on a `ConnectionPool` is awaited.
* - the result of calling `cursor` on a `Connection` is awaited.
* See https://aiomysql.readthedocs.io/en/stable/cursors.html
*/
API::Node cursor() {
result = connectionPool().getMember("cursor").getReturn().getAwaited()
or
result = connection().getMember("cursor").getReturn().getAwaited()
}
/**
* Calling `execute` on a `Cursor` constructs a query.
* See https://aiomysql.readthedocs.io/en/stable/cursors.html#Cursor.execute
*/
class CursorExecuteCall extends SqlConstruction::Range, DataFlow::CallCfgNode {
CursorExecuteCall() { this = cursor().getMember("execute").getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("operation")] }
}
/**
* This is only needed to connect the argument to the execute call with the subsequnt awaiting.
* It should be obsolete once we have `API::CallNode` available.
*/
private DataFlow::TypeTrackingNode cursorExecuteCall(DataFlow::TypeTracker t, DataFlow::Node sql) {
// cursor created from connection
t.start() and
sql = result.(CursorExecuteCall).getSql()
or
exists(DataFlow::TypeTracker t2 | result = cursorExecuteCall(t2, sql).track(t2, t))
}
DataFlow::Node cursorExecuteCall(DataFlow::Node sql) {
cursorExecuteCall(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
/**
* Awaiting the result of calling `execute` executes the query.
* See https://aiomysql.readthedocs.io/en/stable/cursors.html#Cursor.execute
*/
class AwaitedCursorExecuteCall extends SqlExecution::Range {
DataFlow::Node sql;
AwaitedCursorExecuteCall() { this = awaited(cursorExecuteCall(sql)) }
override DataFlow::Node getSql() { result = sql }
}
/**
* An `Engine` is created when the result of calling `aiomysql.sa.create_engine` is awaited.
* See https://aiomysql.readthedocs.io/en/stable/sa.html#engine
*/
API::Node engine() {
result =
API::moduleImport("aiomysql")
.getMember("sa")
.getMember("create_engine")
.getReturn()
.getAwaited()
}
/**
* A `SAConnection` is created when the result of calling `aquire` on an `Engine` is awaited.
* See https://aiomysql.readthedocs.io/en/stable/sa.html#connection
*/
API::Node saConnection() { result = engine().getMember("acquire").getReturn().getAwaited() }
/**
* Calling `execute` on a `SAConnection` constructs a query.
* See https://aiomysql.readthedocs.io/en/stable/sa.html#aiomysql.sa.SAConnection.execute
*/
class SAConnectionExecuteCall extends SqlConstruction::Range, DataFlow::CallCfgNode {
SAConnectionExecuteCall() { this = saConnection().getMember("execute").getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
}
/**
* This is only needed to connect the argument to the execute call with the subsequnt awaiting.
* It should be obsolete once we have `API::CallNode` available.
*/
private DataFlow::TypeTrackingNode saConnectionExecuteCall(
DataFlow::TypeTracker t, DataFlow::Node sql
) {
// saConnection created from engine
t.start() and
sql = result.(SAConnectionExecuteCall).getSql()
or
exists(DataFlow::TypeTracker t2 | result = saConnectionExecuteCall(t2, sql).track(t2, t))
}
DataFlow::Node saConnectionExecuteCall(DataFlow::Node sql) {
saConnectionExecuteCall(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
/**
* Awaiting the result of calling `execute` executes the query.
* See https://aiomysql.readthedocs.io/en/stable/sa.html#aiomysql.sa.SAConnection.execute
*/
class AwaitedSAConnectionExecuteCall extends SqlExecution::Range {
DataFlow::Node sql;
AwaitedSAConnectionExecuteCall() { this = awaited(saConnectionExecuteCall(sql)) }
override DataFlow::Node getSql() { result = sql }
}
}

View File

@@ -0,0 +1,141 @@
/**
* Provides classes modeling security-relevant aspects of the `aiopg` PyPI package.
* See
* - https://aiopg.readthedocs.io/en/stable/index.html
* - https://pypi.org/project/aiopg/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/** Provides models for the `aiopg` PyPI package. */
private module Aiopg {
private import semmle.python.internal.Awaited
/**
* A `ConectionPool` is created when the result of `aiopg.create_pool()` is awaited.
* See https://aiopg.readthedocs.io/en/stable/core.html#pool
*/
API::Node connectionPool() {
result = API::moduleImport("aiopg").getMember("create_pool").getReturn().getAwaited()
}
/**
* A `Connection` is created when
* - the result of `aiopg.connect()` is awaited.
* - the result of calling `aquire` on a `ConnectionPool` is awaited.
* See https://aiopg.readthedocs.io/en/stable/core.html#connection
*/
API::Node connection() {
result = API::moduleImport("aiopg").getMember("connect").getReturn().getAwaited()
or
result = connectionPool().getMember("acquire").getReturn().getAwaited()
}
/**
* A `Cursor` is created when
* - the result of calling `cursor` on a `ConnectionPool` is awaited.
* - the result of calling `cursor` on a `Connection` is awaited.
* See https://aiopg.readthedocs.io/en/stable/core.html#cursor
*/
API::Node cursor() {
result = connectionPool().getMember("cursor").getReturn().getAwaited()
or
result = connection().getMember("cursor").getReturn().getAwaited()
}
/**
* Calling `execute` on a `Cursor` constructs a query.
* See https://aiopg.readthedocs.io/en/stable/core.html#aiopg.Cursor.execute
*/
class CursorExecuteCall extends SqlConstruction::Range, DataFlow::CallCfgNode {
CursorExecuteCall() { this = cursor().getMember("execute").getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("operation")] }
}
/**
* This is only needed to connect the argument to the execute call with the subsequnt awaiting.
* It should be obsolete once we have `API::CallNode` available.
*/
private DataFlow::TypeTrackingNode cursorExecuteCall(DataFlow::TypeTracker t, DataFlow::Node sql) {
// cursor created from connection
t.start() and
sql = result.(CursorExecuteCall).getSql()
or
exists(DataFlow::TypeTracker t2 | result = cursorExecuteCall(t2, sql).track(t2, t))
}
DataFlow::Node cursorExecuteCall(DataFlow::Node sql) {
cursorExecuteCall(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
/**
* Awaiting the result of calling `execute` executes the query.
* See https://aiopg.readthedocs.io/en/stable/core.html#aiopg.Cursor.execute
*/
class AwaitedCursorExecuteCall extends SqlExecution::Range {
DataFlow::Node sql;
AwaitedCursorExecuteCall() { this = awaited(cursorExecuteCall(sql)) }
override DataFlow::Node getSql() { result = sql }
}
/**
* An `Engine` is created when the result of calling `aiopg.sa.create_engine` is awaited.
* See https://aiopg.readthedocs.io/en/stable/sa.html#engine
*/
API::Node engine() {
result =
API::moduleImport("aiopg").getMember("sa").getMember("create_engine").getReturn().getAwaited()
}
/**
* A `SAConnection` is created when the result of calling `aquire` on an `Engine` is awaited.
* See https://aiopg.readthedocs.io/en/stable/sa.html#connection
*/
API::Node saConnection() { result = engine().getMember("acquire").getReturn().getAwaited() }
/**
* Calling `execute` on a `SAConnection` constructs a query.
* See https://aiopg.readthedocs.io/en/stable/sa.html#aiopg.sa.SAConnection.execute
*/
class SAConnectionExecuteCall extends SqlConstruction::Range, DataFlow::CallCfgNode {
SAConnectionExecuteCall() { this = saConnection().getMember("execute").getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
}
/**
* This is only needed to connect the argument to the execute call with the subsequnt awaiting.
* It should be obsolete once we have `API::CallNode` available.
*/
private DataFlow::TypeTrackingNode saConnectionExecuteCall(
DataFlow::TypeTracker t, DataFlow::Node sql
) {
// saConnection created from engine
t.start() and
sql = result.(SAConnectionExecuteCall).getSql()
or
exists(DataFlow::TypeTracker t2 | result = saConnectionExecuteCall(t2, sql).track(t2, t))
}
DataFlow::Node saConnectionExecuteCall(DataFlow::Node sql) {
saConnectionExecuteCall(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
/**
* Awaiting the result of calling `execute` executes the query.
* See https://aiopg.readthedocs.io/en/stable/sa.html#aiopg.sa.SAConnection.execute
*/
class AwaitedSAConnectionExecuteCall extends SqlExecution::Range {
DataFlow::Node sql;
AwaitedSAConnectionExecuteCall() { this = awaited(saConnectionExecuteCall(sql)) }
override DataFlow::Node getSql() { result = sql }
}
}

View File

@@ -0,0 +1,162 @@
/**
* Provides classes modeling security-relevant aspects of the `asyncpg` PyPI package.
* See https://magicstack.github.io/asyncpg/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/** Provides models for the `asyncpg` PyPI package. */
private module Asyncpg {
private import semmle.python.internal.Awaited
/** A `ConectionPool` is created when the result of `asyncpg.create_pool()` is awaited. */
API::Node connectionPool() {
result = API::moduleImport("asyncpg").getMember("create_pool").getReturn().getAwaited()
}
/**
* A `Connection` is created when
* - the result of `asyncpg.connect()` is awaited.
* - the result of calling `aquire` on a `ConnectionPool` is awaited.
*/
API::Node connection() {
result = API::moduleImport("asyncpg").getMember("connect").getReturn().getAwaited()
or
result = connectionPool().getMember("acquire").getReturn().getAwaited()
}
/** `Connection`s and `ConnectionPool`s provide some methods that execute SQL. */
class SqlExecutionOnConnection extends SqlExecution::Range, DataFlow::MethodCallNode {
string methodName;
SqlExecutionOnConnection() {
methodName in ["copy_from_query", "execute", "fetch", "fetchrow", "fetchval", "executemany"] and
this.calls([connectionPool().getAUse(), connection().getAUse()], methodName)
}
override DataFlow::Node getSql() {
methodName in ["copy_from_query", "execute", "fetch", "fetchrow", "fetchval"] and
result in [this.getArg(0), this.getArgByName("query")]
or
methodName = "executemany" and
result in [this.getArg(0), this.getArgByName("command")]
}
}
/** `Connection`s and `ConnectionPool`s provide some methods that access the file system. */
class FileAccessOnConnection extends FileSystemAccess::Range, DataFlow::MethodCallNode {
string methodName;
FileAccessOnConnection() {
methodName in ["copy_from_query", "copy_from_table", "copy_to_table"] and
this.calls([connectionPool().getAUse(), connection().getAUse()], methodName)
}
// The path argument is keyword only.
override DataFlow::Node getAPathArgument() {
methodName in ["copy_from_query", "copy_from_table"] and
result = this.getArgByName("output")
or
methodName = "copy_to_table" and
result = this.getArgByName("source")
}
}
/**
* Provides models of the `PreparedStatement` class in `asyncpg`.
* `PreparedStatement`s are created when the result of calling `prepare(query)` on a connection is awaited.
* The result of calling `prepare(query)` is a `PreparedStatementFactory` and the argument, `query` needs to
* be tracked to the place where a `PreparedStatement` is created and then futher to any executing methods.
* Hence the two type trackers.
*
* TODO: Rewrite this, once we have `API::CallNode` available.
*/
module PreparedStatement {
class PreparedStatementConstruction extends SqlConstruction::Range, DataFlow::CallCfgNode {
PreparedStatementConstruction() { this = connection().getMember("prepare").getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
}
private DataFlow::TypeTrackingNode preparedStatementFactory(
DataFlow::TypeTracker t, DataFlow::Node sql
) {
t.start() and
sql = result.(PreparedStatementConstruction).getSql()
or
exists(DataFlow::TypeTracker t2 | result = preparedStatementFactory(t2, sql).track(t2, t))
}
DataFlow::Node preparedStatementFactory(DataFlow::Node sql) {
preparedStatementFactory(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
private DataFlow::TypeTrackingNode preparedStatement(DataFlow::TypeTracker t, DataFlow::Node sql) {
t.start() and
result = awaited(preparedStatementFactory(sql))
or
exists(DataFlow::TypeTracker t2 | result = preparedStatement(t2, sql).track(t2, t))
}
DataFlow::Node preparedStatement(DataFlow::Node sql) {
preparedStatement(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
class PreparedStatementExecution extends SqlExecution::Range, DataFlow::MethodCallNode {
DataFlow::Node sql;
PreparedStatementExecution() {
this.calls(preparedStatement(sql), ["executemany", "fetch", "fetchrow", "fetchval"])
}
override DataFlow::Node getSql() { result = sql }
}
}
/**
* Provides models of the `Cursor` class in `asyncpg`.
* `Cursor`s are created
* - when the result of calling `cursor(query)` on a connection is awaited.
* - when the result of calling `cursor()` on a prepared statement is awaited.
* The result of calling `cursor` in either case is a `CursorFactory` and the argument, `query` needs to
* be tracked to the place where a `Cursor` is created, hence the type tracker.
* The creation of the `Cursor` executes the query.
*
* TODO: Rewrite this, once we have `API::CallNode` available.
*/
module Cursor {
class CursorConstruction extends SqlConstruction::Range, DataFlow::CallCfgNode {
CursorConstruction() { this = connection().getMember("cursor").getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
}
private DataFlow::TypeTrackingNode cursorFactory(DataFlow::TypeTracker t, DataFlow::Node sql) {
// cursor created from connection
t.start() and
sql = result.(CursorConstruction).getSql()
or
// cursor created from prepared statement
t.start() and
result.(DataFlow::MethodCallNode).calls(PreparedStatement::preparedStatement(sql), "cursor")
or
exists(DataFlow::TypeTracker t2 | result = cursorFactory(t2, sql).track(t2, t))
}
DataFlow::Node cursorFactory(DataFlow::Node sql) {
cursorFactory(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
/** The creation of a `Cursor` executes the associated query. */
class CursorCreation extends SqlExecution::Range {
DataFlow::Node sql;
CursorCreation() { this = awaited(cursorFactory(sql)) }
override DataFlow::Node getSql() { result = sql }
}
}
}

View File

@@ -17,10 +17,12 @@ private import semmle.python.frameworks.internal.SelfRefMixin
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
/**
* INTERNAL: Do not use.
*
* Provides models for the `django` PyPI package.
* See https://www.djangoproject.com/.
*/
private module Django {
module Django {
/** Provides models for the `django.views` module */
module Views {
/**
@@ -367,6 +369,52 @@ private module Django {
}
}
/**
* Provides models for the `django.contrib.auth.models.User` class
*
* See https://docs.djangoproject.com/en/3.2/ref/contrib/auth/#user-model.
*/
module User {
/**
* A source of instances of `django.contrib.auth.models.User`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `User::instance()` to get references to instances of `django.contrib.auth.models.User`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of `django.contrib.auth.models.User`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `django.contrib.auth.models.User`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `django.contrib.auth.models.User`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "django.contrib.auth.models.User" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in ["username", "first_name", "last_name", "email"]
}
override string getMethodName() { none() }
override string getAsyncMethodName() { none() }
}
}
/**
* Provides models for the `django.core.files.uploadedfile.UploadedFile` class
*
@@ -466,10 +514,12 @@ private module Django {
}
/**
* INTERNAL: Do not use.
*
* Provides models for the `django` PyPI package (that we are not quite ready to publicly expose yet).
* See https://www.djangoproject.com/.
*/
private module PrivateDjango {
module PrivateDjango {
// ---------------------------------------------------------------------------
// django
// ---------------------------------------------------------------------------
@@ -496,6 +546,7 @@ private module PrivateDjango {
/** Gets a reference to the `django.db.connection` object. */
API::Node connection() { result = db().getMember("connection") }
/** A `django.db.connection` is a PEP249 compliant DB connection. */
class DjangoDbConnection extends PEP249::Connection::InstanceSource {
DjangoDbConnection() { this = connection().getAUse() }
}
@@ -692,6 +743,7 @@ private module PrivateDjango {
/** Provides models for the `django.conf` module */
module conf {
/** Provides models for the `django.conf.urls` module */
module conf_urls {
// -------------------------------------------------------------------------
// django.conf.urls
@@ -890,6 +942,7 @@ private module PrivateDjango {
* See https://docs.djangoproject.com/en/3.1/ref/request-response/#django.http.HttpResponse.
*/
module HttpResponse {
/** Gets a reference to the `django.http.response.HttpResponse` class. */
API::Node baseClassRef() {
result = response().getMember("HttpResponse")
or
@@ -897,7 +950,7 @@ private module PrivateDjango {
result = http().getMember("HttpResponse")
}
/** Gets a reference to the `django.http.response.HttpResponse` class. */
/** Gets a reference to the `django.http.response.HttpResponse` class or any subclass. */
API::Node classRef() { result = baseClassRef().getASubclass*() }
/**
@@ -1893,14 +1946,11 @@ private module PrivateDjango {
* with the django framework.
*
* Most functions take a django HttpRequest as a parameter (but not all).
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `DjangoRouteHandler::Range` instead.
*/
private class DjangoRouteHandler extends Function {
DjangoRouteHandler() {
exists(DjangoRouteSetup route | route.getViewArg() = poorMansFunctionTracker(this))
or
any(DjangoViewClass vc).getARequestHandler() = this
}
class DjangoRouteHandler extends Function instanceof DjangoRouteHandler::Range {
/**
* Gets the index of the parameter where the first routed parameter can be passed --
* that is, the one just after any possible `self` or HttpRequest parameters.
@@ -1920,6 +1970,24 @@ private module PrivateDjango {
Parameter getRequestParam() { result = this.getArg(this.getRequestParamIndex()) }
}
/** Provides a class for modeling new django route handlers. */
module DjangoRouteHandler {
/**
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `DjangoRouteHandler` instead.
*/
abstract class Range extends Function { }
/** Route handlers from normal usage of django. */
private class StandardDjangoRouteHandlers extends Range {
StandardDjangoRouteHandlers() {
exists(DjangoRouteSetup route | route.getViewArg() = poorMansFunctionTracker(this))
or
any(DjangoViewClass vc).getARequestHandler() = this
}
}
}
/**
* A method named `get_redirect_url` on a django view class.
*
@@ -1941,7 +2009,7 @@ private module PrivateDjango {
}
/** A data-flow node that sets up a route on a server, using the django framework. */
abstract private class DjangoRouteSetup extends HTTP::Server::RouteSetup::Range, DataFlow::CfgNode {
abstract class DjangoRouteSetup extends HTTP::Server::RouteSetup::Range, DataFlow::CfgNode {
/** Gets the data-flow node that is used as the argument for the view handler. */
abstract DataFlow::Node getViewArg();

View File

@@ -0,0 +1,366 @@
/**
* Provides classes modeling security-relevant aspects of the `fastapi` PyPI package.
* See https://fastapi.tiangolo.com/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.Pydantic
private import semmle.python.frameworks.Starlette
/**
* Provides models for the `fastapi` PyPI package.
* See https://fastapi.tiangolo.com/.
*/
private module FastApi {
/**
* Provides models for FastAPI applications (an instance of `fastapi.FastAPI`).
*/
module App {
/** Gets a reference to a FastAPI application (an instance of `fastapi.FastAPI`). */
API::Node instance() { result = API::moduleImport("fastapi").getMember("FastAPI").getReturn() }
}
/**
* Provides models for the `fastapi.APIRouter` class
*
* See https://fastapi.tiangolo.com/tutorial/bigger-applications/.
*/
module APIRouter {
/** Gets a reference to an instance of `fastapi.APIRouter`. */
API::Node instance() {
result = API::moduleImport("fastapi").getMember("APIRouter").getASubclass*().getReturn()
}
}
// ---------------------------------------------------------------------------
// routing modeling
// ---------------------------------------------------------------------------
/**
* A call to a method like `get` or `post` on a FastAPI application.
*
* See https://fastapi.tiangolo.com/tutorial/first-steps/#define-a-path-operation-decorator
*/
private class FastApiRouteSetup extends HTTP::Server::RouteSetup::Range, DataFlow::CallCfgNode {
FastApiRouteSetup() {
exists(string routeAddingMethod |
routeAddingMethod = HTTP::httpVerbLower()
or
routeAddingMethod in ["api_route", "websocket"]
|
this = App::instance().getMember(routeAddingMethod).getACall()
or
this = APIRouter::instance().getMember(routeAddingMethod).getACall()
)
}
override Parameter getARoutedParameter() {
// this will need to be refined a bit, since you can add special parameters to
// your request handler functions that are used to pass in the response. There
// might be other special cases as well, but as a start this is not too far off
// the mark.
result = this.getARequestHandler().getArgByName(_) and
// type-annotated with `Response`
not any(Response::RequestHandlerParam src).asExpr() = result
}
override DataFlow::Node getUrlPatternArg() {
result in [this.getArg(0), this.getArgByName("path")]
}
override Function getARequestHandler() { result.getADecorator().getAFlowNode() = node }
override string getFramework() { result = "FastAPI" }
/** Gets the argument specifying the response class to use, if any. */
DataFlow::Node getResponseClassArg() { result = this.getArgByName("response_class") }
}
/**
* A parameter to a request handler that has a type-annotation with a class that is a
* Pydantic model.
*/
private class PydanticModelRequestHandlerParam extends Pydantic::BaseModel::InstanceSource,
DataFlow::ParameterNode {
PydanticModelRequestHandlerParam() {
this.getParameter().getAnnotation() = Pydantic::BaseModel::subclassRef().getAUse().asExpr() and
any(FastApiRouteSetup rs).getARequestHandler().getArgByName(_) = this.getParameter()
}
}
// ---------------------------------------------------------------------------
// Response modeling
// ---------------------------------------------------------------------------
/**
* A parameter to a request handler that has a WebSocket type-annotation.
*/
private class WebSocketRequestHandlerParam extends Starlette::WebSocket::InstanceSource,
DataFlow::ParameterNode {
WebSocketRequestHandlerParam() {
this.getParameter().getAnnotation() = Starlette::WebSocket::classRef().getAUse().asExpr() and
any(FastApiRouteSetup rs).getARequestHandler().getArgByName(_) = this.getParameter()
}
}
/**
* Provides models for the `fastapi.Response` class and subclasses.
*
* See https://fastapi.tiangolo.com/advanced/custom-response/#response.
*/
module Response {
/**
* Gets the `API::Node` for the manually modeled response classes called `name`.
*/
private API::Node getModeledResponseClass(string name) {
name = "Response" and
result = API::moduleImport("fastapi").getMember(name)
or
// see https://github.com/tiangolo/fastapi/blob/master/fastapi/responses.py
name in [
"Response", "HTMLResponse", "PlainTextResponse", "JSONResponse", "UJSONResponse",
"ORJSONResponse", "RedirectResponse", "StreamingResponse", "FileResponse"
] and
result = API::moduleImport("fastapi").getMember("responses").getMember(name)
}
/**
* Gets the default MIME type for a FastAPI response class (defined with the
* `media_type` class-attribute).
*
* Also models user-defined classes and tries to take inheritance into account.
*
* TODO: build easy way to solve problems like this, like we used to have the
* `ClassValue.lookup` predicate.
*/
private string getDefaultMimeType(API::Node responseClass) {
exists(string name | responseClass = getModeledResponseClass(name) |
// no defaults for these.
name in ["Response", "RedirectResponse", "StreamingResponse"] and
none()
or
// For `FileResponse` the code will guess what mimetype
// to use, or fall back to "text/plain", but claiming that all responses will
// have "text/plain" per default is also highly inaccurate, so just going to not
// do anything about this.
name = "FileResponse" and
none()
or
name = "HTMLResponse" and
result = "text/html"
or
name = "PlainTextResponse" and
result = "text/plain"
or
name in ["JSONResponse", "UJSONResponse", "ORJSONResponse"] and
result = "application/json"
)
or
// user-defined subclasses
exists(Class cls, API::Node base |
base = getModeledResponseClass(_).getASubclass*() and
cls.getABase() = base.getAUse().asExpr() and
responseClass.getAnImmediateUse().asExpr() = cls.getParent()
|
exists(Assign assign | assign = cls.getAStmt() |
assign.getATarget().(Name).getId() = "media_type" and
result = assign.getValue().(StrConst).getText()
)
or
// TODO: this should use a proper MRO calculation instead
not exists(Assign assign | assign = cls.getAStmt() |
assign.getATarget().(Name).getId() = "media_type"
) and
result = getDefaultMimeType(base)
)
}
/**
* A source of instances of `fastapi.Response` and its' subclasses, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Response::instance()` to get references to instances of `fastapi.Response`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of a response class. */
private class ResponseInstantiation extends InstanceSource, HTTP::Server::HttpResponse::Range,
DataFlow::CallCfgNode {
API::Node baseApiNode;
API::Node responseClass;
ResponseInstantiation() {
baseApiNode = getModeledResponseClass(_) and
responseClass = baseApiNode.getASubclass*() and
this = responseClass.getACall()
}
override DataFlow::Node getBody() {
not baseApiNode = getModeledResponseClass(["RedirectResponse", "FileResponse"]) and
result in [this.getArg(0), this.getArgByName("content")]
}
override DataFlow::Node getMimetypeOrContentTypeArg() {
not baseApiNode = getModeledResponseClass("RedirectResponse") and
result in [this.getArg(3), this.getArgByName("media_type")]
}
override string getMimetypeDefault() { result = getDefaultMimeType(responseClass) }
}
/**
* A direct instantiation of a redirect response.
*/
private class RedirectResponseInstantiation extends ResponseInstantiation,
HTTP::Server::HttpRedirectResponse::Range {
RedirectResponseInstantiation() { baseApiNode = getModeledResponseClass("RedirectResponse") }
override DataFlow::Node getRedirectLocation() {
result in [this.getArg(0), this.getArgByName("url")]
}
}
/**
* A direct instantiation of a FileResponse.
*/
private class FileResponseInstantiation extends ResponseInstantiation, FileSystemAccess::Range {
FileResponseInstantiation() { baseApiNode = getModeledResponseClass("FileResponse") }
override DataFlow::Node getAPathArgument() {
result in [this.getArg(0), this.getArgByName("path")]
}
}
/**
* An implicit response from a return of FastAPI request handler.
*/
private class FastApiRequestHandlerReturn extends HTTP::Server::HttpResponse::Range,
DataFlow::CfgNode {
FastApiRouteSetup routeSetup;
FastApiRequestHandlerReturn() {
node = routeSetup.getARequestHandler().getAReturnValueFlowNode()
}
override DataFlow::Node getBody() { result = this }
override DataFlow::Node getMimetypeOrContentTypeArg() { none() }
override string getMimetypeDefault() {
exists(API::Node responseClass |
responseClass.getAUse() = routeSetup.getResponseClassArg() and
result = getDefaultMimeType(responseClass)
)
or
not exists(routeSetup.getResponseClassArg()) and
result = "application/json"
}
}
/**
* An implicit response from a return of FastAPI request handler, that has
* `response_class` set to a `FileResponse`.
*/
private class FastApiRequestHandlerFileResponseReturn extends FastApiRequestHandlerReturn,
FileSystemAccess::Range {
FastApiRequestHandlerFileResponseReturn() {
exists(API::Node responseClass |
responseClass.getAUse() = routeSetup.getResponseClassArg() and
responseClass = getModeledResponseClass("FileResponse").getASubclass*()
)
}
override DataFlow::Node getBody() { none() }
override DataFlow::Node getAPathArgument() { result = this }
}
/**
* An implicit response from a return of FastAPI request handler, that has
* `response_class` set to a `RedirectResponse`.
*/
private class FastApiRequestHandlerRedirectReturn extends FastApiRequestHandlerReturn,
HTTP::Server::HttpRedirectResponse::Range {
FastApiRequestHandlerRedirectReturn() {
exists(API::Node responseClass |
responseClass.getAUse() = routeSetup.getResponseClassArg() and
responseClass = getModeledResponseClass("RedirectResponse").getASubclass*()
)
}
override DataFlow::Node getBody() { none() }
override DataFlow::Node getRedirectLocation() { result = this }
}
/**
* INTERNAL: Do not use.
*
* A parameter to a FastAPI request-handler that has a `fastapi.Response`
* type-annotation.
*/
class RequestHandlerParam extends InstanceSource, DataFlow::ParameterNode {
RequestHandlerParam() {
this.getParameter().getAnnotation() =
getModeledResponseClass(_).getASubclass*().getAUse().asExpr() and
any(FastApiRouteSetup rs).getARequestHandler().getArgByName(_) = this.getParameter()
}
}
/** Gets a reference to an instance of `fastapi.Response`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `fastapi.Response`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* A call to `set_cookie` on a FastAPI Response.
*/
private class SetCookieCall extends HTTP::Server::CookieWrite::Range, DataFlow::MethodCallNode {
SetCookieCall() { this.calls(instance(), "set_cookie") }
override DataFlow::Node getHeaderArg() { none() }
override DataFlow::Node getNameArg() { result in [this.getArg(0), this.getArgByName("key")] }
override DataFlow::Node getValueArg() {
result in [this.getArg(1), this.getArgByName("value")]
}
}
/**
* A call to `append` on a `headers` of a FastAPI Response, with the `Set-Cookie`
* header-key.
*/
private class HeadersAppendCookie extends HTTP::Server::CookieWrite::Range,
DataFlow::MethodCallNode {
HeadersAppendCookie() {
exists(DataFlow::AttrRead headers, DataFlow::Node keyArg |
headers.accesses(instance(), "headers") and
this.calls(headers, "append") and
keyArg in [this.getArg(0), this.getArgByName("key")] and
keyArg.getALocalSource().asExpr().(StrConst).getText().toLowerCase() = "set-cookie"
)
}
override DataFlow::Node getHeaderArg() {
result in [this.getArg(1), this.getArgByName("value")]
}
override DataFlow::Node getNameArg() { none() }
override DataFlow::Node getValueArg() { none() }
}
}
}

View File

@@ -11,6 +11,7 @@ private import semmle.python.Concepts
private import semmle.python.frameworks.Werkzeug
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.security.dataflow.PathInjectionCustomizations
/**
* Provides models for the `flask` PyPI package.
@@ -73,7 +74,11 @@ module Flask {
*/
module Blueprint {
/** Gets a reference to the `flask.Blueprint` class. */
API::Node classRef() { result = API::moduleImport("flask").getMember("Blueprint") }
API::Node classRef() {
result = API::moduleImport("flask").getMember("Blueprint")
or
result = API::moduleImport("flask").getMember("blueprints").getMember("Blueprint")
}
/** Gets a reference to an instance of `flask.Blueprint`. */
API::Node instance() { result = classRef().getReturn() }
@@ -233,7 +238,7 @@ module Flask {
}
/** A route setup made by flask (sharing handling of URL patterns). */
abstract private class FlaskRouteSetup extends HTTP::Server::RouteSetup::Range {
abstract class FlaskRouteSetup extends HTTP::Server::RouteSetup::Range {
override Parameter getARoutedParameter() {
// If we don't know the URL pattern, we simply mark all parameters as a routed
// parameter. This should give us more RemoteFlowSources but could also lead to
@@ -519,4 +524,49 @@ module Flask {
override DataFlow::Node getValueArg() { none() }
}
/**
* A call to `flask.send_from_directory`.
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.send_from_directory
*/
private class FlaskSendFromDirectoryCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
FlaskSendFromDirectoryCall() {
this = API::moduleImport("flask").getMember("send_from_directory").getACall()
}
override DataFlow::Node getAPathArgument() {
result in [
this.getArg(0), this.getArgByName("directory"),
// as described in the docs, the `filename` argument is restrained to be within
// the provided directory, so is not exposed to path-injection. (but is still a
// path-argument).
this.getArg(1), this.getArgByName("filename")
]
}
}
/**
* To exclude `filename` argument to `flask.send_from_directory` as a path-injection sink.
*/
private class FlaskSendFromDirectoryCallFilenameSanitizer extends PathInjection::Sanitizer {
FlaskSendFromDirectoryCallFilenameSanitizer() {
this = any(FlaskSendFromDirectoryCall c).getArg(1)
or
this = any(FlaskSendFromDirectoryCall c).getArgByName("filename")
}
}
/**
* A call to `flask.send_file`.
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.send_file
*/
private class FlaskSendFileCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
FlaskSendFileCall() { this = API::moduleImport("flask").getMember("send_file").getACall() }
override DataFlow::Node getAPathArgument() {
result in [this.getArg(0), this.getArgByName("filename_or_fp")]
}
}
}

View File

@@ -0,0 +1,79 @@
/**
* Provides classes modeling security-relevant aspects of the `Flask-Admin` PyPI package
* (imported as `flask_admin`).
*
* See
* - https://flask-admin.readthedocs.io/en/latest/
* - https://pypi.org/project/Flask-Admin/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.frameworks.Flask
private import semmle.python.ApiGraphs
/**
* Provides models for the `Flask-Admin` PyPI package (imported as `flask_admin`).
*
* See
* - https://flask-admin.readthedocs.io/en/latest/
* - https://pypi.org/project/Flask-Admin/
*/
private module FlaskAdmin {
/**
* A call to `flask_admin.expose`, which is used as a decorator to make the
* function exposed in the admin interface (and make it a request handler)
*
* See https://flask-admin.readthedocs.io/en/latest/api/mod_base/#flask_admin.base.expose
*/
private class FlaskAdminExposeCall extends Flask::FlaskRouteSetup, DataFlow::CallCfgNode {
FlaskAdminExposeCall() {
this = API::moduleImport("flask_admin").getMember("expose").getACall()
}
override DataFlow::Node getUrlPatternArg() {
result in [this.getArg(0), this.getArgByName("url")]
}
override Function getARequestHandler() { result.getADecorator().getAFlowNode() = node }
}
/**
* A call to `flask_admin.expose_plugview`, which is used as a decorator to make the
* class (which we expect to be a flask View class) exposed in the admin interface.
*
* See https://flask-admin.readthedocs.io/en/latest/api/mod_base/#flask_admin.base.expose_plugview
*/
private class FlaskAdminExposePlugviewCall extends Flask::FlaskRouteSetup, DataFlow::CallCfgNode {
FlaskAdminExposePlugviewCall() {
this = API::moduleImport("flask_admin").getMember("expose_plugview").getACall()
}
override DataFlow::Node getUrlPatternArg() {
result in [this.getArg(0), this.getArgByName("url")]
}
override Parameter getARoutedParameter() {
result = super.getARoutedParameter() and
(
exists(this.getUrlPattern())
or
// the first argument is `self`, and the second argument `cls` will receive the
// containing flask_admin View class -- this is only relevant if the URL pattern
// is not known
not exists(this.getUrlPattern()) and
not result = this.getARequestHandler().getArg([0, 1])
)
}
override Function getARequestHandler() {
exists(Flask::FlaskViewClass cls |
cls.getADecorator().getAFlowNode() = node and
result = cls.getARequestHandler()
)
}
}
}

View File

@@ -0,0 +1,108 @@
/**
* Provides classes modeling security-relevant aspects of the `pydantic` PyPI package.
*
* See
* - https://pypi.org/project/pydantic/
* - https://pydantic-docs.helpmanual.io/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* INTERNAL: Do not use.
*
* Provides models for `pydantic` PyPI package.
*
* See
* - https://pypi.org/project/pydantic/
* - https://pydantic-docs.helpmanual.io/
*/
module Pydantic {
/**
* Provides models for `pydantic.BaseModel` subclasses (a pydantic model).
*
* See https://pydantic-docs.helpmanual.io/usage/models/.
*/
module BaseModel {
/** Gets a reference to a `pydantic.BaseModel` subclass (a pydantic model). */
API::Node subclassRef() {
result = API::moduleImport("pydantic").getMember("BaseModel").getASubclass+()
}
/**
* A source of instances of `pydantic.BaseModel` subclasses, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `BaseModel::instance()` to get references to instances of `pydantic.BaseModel`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of a `pydantic.BaseModel` subclass. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
t.start() and
instanceStepToPydanticModel(_, result)
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of a `pydantic.BaseModel` subclass. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* A step from an instance of a `pydantic.BaseModel` subclass, that might result in
* an instance of a `pydantic.BaseModel` subclass.
*
* NOTE: We currently overapproximate, and treat all attributes as containing
* another pydantic model. For the code below, we _could_ limit this to `main_foo`
* and members of `other_foos`. IF THIS IS CHANGED, YOU MUST CHANGE THE ADDITIONAL
* TAINT STEPS BELOW, SUCH THAT SIMPLE ACCESS OF SOMETHIGN LIKE `str` IS STILL
* TAINTED.
*
*
* ```py
* class MyComplexModel(BaseModel):
* field: str
* main_foo: Foo
* other_foos: List[Foo]
* ```
*/
private predicate instanceStepToPydanticModel(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// attributes (such as `model.foo`)
nodeFrom = instance() and
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom
or
// subscripts on attributes (such as `model.foo[0]`). This needs to handle nested
// lists (such as `model.foo[0][0]`), and access being split into multiple
// statements (such as `xs = model.foo; xs[0]`).
//
// To handle this we overapproximate which things are a Pydantic model, by
// treating any subscript on anything that originates on a Pydantic model to also
// be a Pydantic model. So `model[0]` will be an overapproximation, but should not
// really cause problems (since we don't expect real code to contain such accesses)
nodeFrom = instance() and
nodeTo.asCfgNode().(SubscriptNode).getObject() = nodeFrom.asCfgNode()
}
/**
* Extra taint propagation for `pydantic.BaseModel` subclasses. (note that these could also be `pydantic.BaseModel` subclasses)
*/
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// NOTE: if `instanceStepToPydanticModel` is changed to be more precise, these
// taint steps should be expanded, such that a field that has type `str` is
// still tainted.
instanceStepToPydanticModel(nodeFrom, nodeTo)
}
}
}
}

View File

@@ -0,0 +1,171 @@
/**
* Provides classes modeling security-relevant aspects of the `requests` PyPI package.
*
* See
* - https://pypi.org/project/requests/
* - https://docs.python-requests.org/en/latest/
*/
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.frameworks.Stdlib
/**
* INTERNAL: Do not use.
*
* Provides models for the `requests` PyPI package.
*
* See
* - https://pypi.org/project/requests/
* - https://docs.python-requests.org/en/latest/
*/
private module Requests {
private class OutgoingRequestCall extends HTTP::Client::Request::Range, DataFlow::CallCfgNode {
string methodName;
OutgoingRequestCall() {
methodName in [HTTP::httpVerbLower(), "request"] and
(
this = API::moduleImport("requests").getMember(methodName).getACall()
or
exists(API::Node moduleExporting, API::Node sessionInstance |
moduleExporting in [
API::moduleImport("requests"), //
API::moduleImport("requests").getMember("sessions")
] and
sessionInstance = moduleExporting.getMember(["Session", "session"]).getReturn()
|
this = sessionInstance.getMember(methodName).getACall()
)
)
}
override DataFlow::Node getAUrlPart() {
result = this.getArgByName("url")
or
not methodName = "request" and
result = this.getArg(0)
or
methodName = "request" and
result = this.getArg(1)
}
/** Gets the `verify` argument to this outgoing requests call. */
DataFlow::Node getVerifyArg() { result = this.getArgByName("verify") }
override predicate disablesCertificateValidation(
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
) {
disablingNode = this.getVerifyArg() and
argumentOrigin = verifyArgBacktracker(disablingNode) and
argumentOrigin.asExpr().(ImmutableLiteral).booleanValue() = false and
not argumentOrigin.asExpr() instanceof None
}
override string getFramework() { result = "requests" }
}
/**
* Extra taint propagation for outgoing requests calls,
* to ensure that responses to user-controlled URL are tainted.
*/
private class OutgoingRequestCallTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeFrom = nodeTo.(OutgoingRequestCall).getAUrlPart()
}
}
/** Gets a back-reference to the verify argument `arg`. */
private DataFlow::TypeTrackingNode verifyArgBacktracker(
DataFlow::TypeBackTracker t, DataFlow::Node arg
) {
t.start() and
arg = any(OutgoingRequestCall c).getVerifyArg() and
result = arg.getALocalSource()
or
exists(DataFlow::TypeBackTracker t2 | result = verifyArgBacktracker(t2, arg).backtrack(t2, t))
}
/** Gets a back-reference to the verify argument `arg`. */
private DataFlow::LocalSourceNode verifyArgBacktracker(DataFlow::Node arg) {
result = verifyArgBacktracker(DataFlow::TypeBackTracker::end(), arg)
}
// ---------------------------------------------------------------------------
// Response
// ---------------------------------------------------------------------------
/**
* Provides models for the `requests.models.Response` class
*
* See https://docs.python-requests.org/en/latest/api/#requests.Response.
*/
module Response {
/** Gets a reference to the `requests.models.Response` class. */
private API::Node classRef() {
result = API::moduleImport("requests").getMember("models").getMember("Response")
or
result = API::moduleImport("requests").getMember("Response")
}
/**
* A source of instances of `requests.models.Response`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Response::instance()` to get references to instances of `requests.models.Response`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `requests.models.Response`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }
}
/** Return value from making a reuqest. */
private class RequestReturnValue extends InstanceSource, DataFlow::Node {
RequestReturnValue() { this = any(OutgoingRequestCall c) }
}
/** Gets a reference to an instance of `requests.models.Response`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `requests.models.Response`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `requests.models.Response`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "requests.models.Response" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in ["text", "content", "raw", "links", "cookies", "headers"]
}
override string getMethodName() { result in ["json", "iter_content", "iter_lines"] }
override string getAsyncMethodName() { none() }
}
/** An attribute read that is a file-like instance. */
private class FileLikeInstances extends Stdlib::FileLikeObject::InstanceSource {
FileLikeInstances() {
this.(DataFlow::AttrRead).getObject() = instance() and
this.(DataFlow::AttrRead).getAttributeName() = "raw"
}
}
}
}

View File

@@ -0,0 +1,369 @@
/**
* Provides classes modeling security-relevant aspects of the `djangorestframework` PyPI package
* (imported as `rest_framework`)
*
* See
* - https://www.django-rest-framework.org/
* - https://pypi.org/project/djangorestframework/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.frameworks.Django
private import semmle.python.frameworks.Stdlib
/**
* INTERNAL: Do not use.
*
* Provides models for the `djangorestframework` PyPI package
* (imported as `rest_framework`)
*
* See
* - https://www.django-rest-framework.org/
* - https://pypi.org/project/djangorestframework/
*/
private module RestFramework {
// ---------------------------------------------------------------------------
// rest_framework.views.APIView handling
// ---------------------------------------------------------------------------
/**
* An `API::Node` representing the `rest_framework.views.APIView` class or any subclass
* that has explicitly been modeled in the CodeQL libraries.
*/
private class ModeledApiViewClasses extends Django::Views::View::ModeledSubclass {
ModeledApiViewClasses() {
this = API::moduleImport("rest_framework").getMember("views").getMember("APIView")
or
// imports generated by python/frameworks/internal/SubclassFinder.qll
this =
API::moduleImport("rest_framework")
.getMember("authtoken")
.getMember("views")
.getMember("APIView")
or
this =
API::moduleImport("rest_framework")
.getMember("authtoken")
.getMember("views")
.getMember("ObtainAuthToken")
or
this = API::moduleImport("rest_framework").getMember("decorators").getMember("APIView")
or
this = API::moduleImport("rest_framework").getMember("generics").getMember("CreateAPIView")
or
this = API::moduleImport("rest_framework").getMember("generics").getMember("DestroyAPIView")
or
this = API::moduleImport("rest_framework").getMember("generics").getMember("GenericAPIView")
or
this = API::moduleImport("rest_framework").getMember("generics").getMember("ListAPIView")
or
this =
API::moduleImport("rest_framework").getMember("generics").getMember("ListCreateAPIView")
or
this = API::moduleImport("rest_framework").getMember("generics").getMember("RetrieveAPIView")
or
this =
API::moduleImport("rest_framework")
.getMember("generics")
.getMember("RetrieveDestroyAPIView")
or
this =
API::moduleImport("rest_framework").getMember("generics").getMember("RetrieveUpdateAPIView")
or
this =
API::moduleImport("rest_framework")
.getMember("generics")
.getMember("RetrieveUpdateDestroyAPIView")
or
this = API::moduleImport("rest_framework").getMember("generics").getMember("UpdateAPIView")
or
this = API::moduleImport("rest_framework").getMember("routers").getMember("APIRootView")
or
this = API::moduleImport("rest_framework").getMember("routers").getMember("SchemaView")
or
this =
API::moduleImport("rest_framework")
.getMember("schemas")
.getMember("views")
.getMember("APIView")
or
this =
API::moduleImport("rest_framework")
.getMember("schemas")
.getMember("views")
.getMember("SchemaView")
or
this = API::moduleImport("rest_framework").getMember("viewsets").getMember("GenericViewSet")
or
this = API::moduleImport("rest_framework").getMember("viewsets").getMember("ModelViewSet")
or
this =
API::moduleImport("rest_framework").getMember("viewsets").getMember("ReadOnlyModelViewSet")
or
this = API::moduleImport("rest_framework").getMember("viewsets").getMember("ViewSet")
}
}
/**
* A class that has a super-type which is a rest_framework APIView class, therefore also
* becoming a APIView class.
*/
class RestFrameworkApiViewClass extends PrivateDjango::DjangoViewClassFromSuperClass {
RestFrameworkApiViewClass() {
this.getABase() = any(ModeledApiViewClasses c).getASubclass*().getAUse().asExpr()
}
override Function getARequestHandler() {
result = super.getARequestHandler()
or
// TODO: This doesn't handle attribute assignment. Should be OK, but analysis is not as complete as with
// points-to and `.lookup`, which would handle `post = my_post_handler` inside class def
result = this.getAMethod() and
result.getName() in [
// these method names where found by looking through the APIView
// implementation in
// https://github.com/encode/django-rest-framework/blob/master/rest_framework/views.py#L104
"initial", "http_method_not_allowed", "permission_denied", "throttled",
"get_authenticate_header", "perform_content_negotiation", "perform_authentication",
"check_permissions", "check_object_permissions", "check_throttles", "determine_version",
"initialize_request", "finalize_response", "dispatch", "options"
]
}
}
// ---------------------------------------------------------------------------
// rest_framework.decorators.api_view handling
// ---------------------------------------------------------------------------
/**
* A function that is a request handler since it is decorated with `rest_framework.decorators.api_view`
*/
class RestFrameworkFunctionBasedView extends PrivateDjango::DjangoRouteHandler::Range {
RestFrameworkFunctionBasedView() {
this.getADecorator() =
API::moduleImport("rest_framework")
.getMember("decorators")
.getMember("api_view")
.getACall()
.asExpr()
}
}
/**
* Ensuring that all `RestFrameworkFunctionBasedView` are also marked as a
* `HTTP::Server::RequestHandler`. We only need this for the ones that doesn't have a
* known route setup.
*/
class RestFrameworkFunctionBasedViewWithoutKnownRoute extends HTTP::Server::RequestHandler::Range,
PrivateDjango::DjangoRouteHandler instanceof RestFrameworkFunctionBasedView {
RestFrameworkFunctionBasedViewWithoutKnownRoute() {
not exists(PrivateDjango::DjangoRouteSetup setup | setup.getARequestHandler() = this)
}
override Parameter getARoutedParameter() {
// Since we don't know the URL pattern, we simply mark all parameters as a routed
// parameter. This should give us more RemoteFlowSources but could also lead to
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
result in [this.getArg(_), this.getArgByName(_)] and
not result = any(int i | i < this.getFirstPossibleRoutedParamIndex() | this.getArg(i))
}
override string getFramework() { result = "Django (rest_framework)" }
}
// ---------------------------------------------------------------------------
// request modeling
// ---------------------------------------------------------------------------
/**
* A parameter that will receive a `rest_framework.request.Request` instance when a
* request handler is invoked.
*/
private class RestFrameworkRequestHandlerRequestParam extends Request::InstanceSource,
RemoteFlowSource::Range, DataFlow::ParameterNode {
RestFrameworkRequestHandlerRequestParam() {
// rest_framework.views.APIView subclass
exists(RestFrameworkApiViewClass vc |
this.getParameter() =
vc.getARequestHandler().(PrivateDjango::DjangoRouteHandler).getRequestParam()
)
or
// annotated with @api_view decorator
exists(PrivateDjango::DjangoRouteHandler rh | rh instanceof RestFrameworkFunctionBasedView |
this.getParameter() = rh.getRequestParam()
)
}
override string getSourceType() { result = "rest_framework.request.HttpRequest" }
}
/**
* Provides models for the `rest_framework.request.Request` class
*
* See https://www.django-rest-framework.org/api-guide/requests/.
*/
module Request {
/** Gets a reference to the `rest_framework.request.Request` class. */
private API::Node classRef() {
result = API::moduleImport("rest_framework").getMember("request").getMember("Request")
}
/**
* A source of instances of `rest_framework.request.Request`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Request::instance()` to get references to instances of `rest_framework.request.Request`.
*/
abstract class InstanceSource extends PrivateDjango::django::http::request::HttpRequest::InstanceSource {
}
/** A direct instantiation of `rest_framework.request.Request`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }
}
/** Gets a reference to an instance of `rest_framework.request.Request`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `rest_framework.request.Request`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `rest_framework.request.Request`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "rest_framework.request.Request" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in ["data", "query_params", "user", "auth", "content_type", "stream"]
}
override string getMethodName() { none() }
override string getAsyncMethodName() { none() }
}
/** An attribute read that is a `MultiValueDict` instance. */
private class MultiValueDictInstances extends Django::MultiValueDict::InstanceSource {
MultiValueDictInstances() {
this.(DataFlow::AttrRead).getObject() = instance() and
this.(DataFlow::AttrRead).getAttributeName() = "query_params"
}
}
/** An attribute read that is a `User` instance. */
private class UserInstances extends Django::User::InstanceSource {
UserInstances() {
this.(DataFlow::AttrRead).getObject() = instance() and
this.(DataFlow::AttrRead).getAttributeName() = "user"
}
}
/** An attribute read that is a file-like instance. */
private class FileLikeInstances extends Stdlib::FileLikeObject::InstanceSource {
FileLikeInstances() {
this.(DataFlow::AttrRead).getObject() = instance() and
this.(DataFlow::AttrRead).getAttributeName() = "stream"
}
}
}
// ---------------------------------------------------------------------------
// response modeling
// ---------------------------------------------------------------------------
/**
* Provides models for the `rest_framework.response.Response` class
*
* See https://www.django-rest-framework.org/api-guide/responses/.
*/
module Response {
/** Gets a reference to the `rest_framework.response.Response` class. */
private API::Node classRef() {
result = API::moduleImport("rest_framework").getMember("response").getMember("Response")
}
/**
* A source of instances of `rest_framework.response.Response`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Response::instance()` to get references to instances of `rest_framework.response.Response`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `rest_framework.response.Response`. */
private class ClassInstantiation extends PrivateDjango::django::http::response::HttpResponse::InstanceSource,
DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }
override DataFlow::Node getBody() { result in [this.getArg(0), this.getArgByName("data")] }
override DataFlow::Node getMimetypeOrContentTypeArg() {
result in [this.getArg(5), this.getArgByName("content_type")]
}
override string getMimetypeDefault() { none() }
}
}
// ---------------------------------------------------------------------------
// Exception response modeling
// ---------------------------------------------------------------------------
/**
* Provides models for the `rest_framework.exceptions.APIException` class and subclasses
*
* See https://www.django-rest-framework.org/api-guide/exceptions/#api-reference
*/
module APIException {
/** A direct instantiation of `rest_framework.exceptions.APIException` or subclass. */
private class ClassInstantiation extends HTTP::Server::HttpResponse::Range,
DataFlow::CallCfgNode {
string className;
ClassInstantiation() {
className in [
"APIException", "ValidationError", "ParseError", "AuthenticationFailed",
"NotAuthenticated", "PermissionDenied", "NotFound", "MethodNotAllowed", "NotAcceptable",
"UnsupportedMediaType", "Throttled"
] and
this =
API::moduleImport("rest_framework")
.getMember("exceptions")
.getMember(className)
.getACall()
}
override DataFlow::Node getBody() {
className in [
"APIException", "ValidationError", "ParseError", "AuthenticationFailed",
"NotAuthenticated", "PermissionDenied", "NotFound", "NotAcceptable"
] and
result = this.getArg(0)
or
className in ["MethodNotAllowed", "UnsupportedMediaType", "Throttled"] and
result = this.getArg(1)
or
result = this.getArgByName("detail")
}
override DataFlow::Node getMimetypeOrContentTypeArg() { none() }
override string getMimetypeDefault() { none() }
}
}
}

View File

@@ -211,6 +211,13 @@ module SqlAlchemy {
.getReturn()
.getMember("begin")
.getACall()
or
this =
API::moduleImport("sqlalchemy")
.getMember("orm")
.getMember("scoped_session")
.getReturn()
.getACall()
}
}
@@ -313,9 +320,9 @@ module SqlAlchemy {
* A construction of a `sqlalchemy.sql.expression.TextClause`, which represents a
* textual SQL string directly.
*/
abstract class TextClauseConstruction extends DataFlow::CallCfgNode {
abstract class TextClauseConstruction extends SqlConstruction::Range, DataFlow::CallCfgNode {
/** Gets the argument that specifies the SQL text. */
DataFlow::Node getTextArg() { result in [this.getArg(0), this.getArgByName("text")] }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("text")] }
}
/** `TextClause` constructions from the `sqlalchemy` package. */

View File

@@ -0,0 +1,162 @@
/**
* Provides classes modeling security-relevant aspects of the `starlette` PyPI package.
*
* See
* - https://pypi.org/project/starlette/
* - https://www.starlette.io/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.frameworks.Stdlib
/**
* INTERNAL: Do not use.
*
* Provides models for `starlette` PyPI package.
*
* See
* - https://pypi.org/project/starlette/
* - https://www.starlette.io/
*/
module Starlette {
/**
* Provides models for the `starlette.websockets.WebSocket` class
*
* See https://www.starlette.io/websockets/.
*/
module WebSocket {
/** Gets a reference to the `starlette.websockets.WebSocket` class. */
API::Node classRef() {
result = API::moduleImport("starlette").getMember("websockets").getMember("WebSocket")
or
result = API::moduleImport("fastapi").getMember("WebSocket")
}
/**
* A source of instances of `starlette.websockets.WebSocket`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `WebSocket::instance()` to get references to instances of `starlette.websockets.WebSocket`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `starlette.websockets.WebSocket`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }
}
/** Gets a reference to an instance of `starlette.websockets.WebSocket`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `starlette.websockets.WebSocket`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `starlette.websockets.WebSocket`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "starlette.websockets.WebSocket" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() { result in ["url", "headers", "query_params", "cookies"] }
override string getMethodName() { none() }
override string getAsyncMethodName() {
result in [
"receive", "receive_bytes", "receive_text", "receive_json", "iter_bytes", "iter_text",
"iter_json"
]
}
}
/** An attribute read on a `starlette.websockets.WebSocket` instance that is a `starlette.requests.URL` instance. */
private class UrlInstances extends URL::InstanceSource {
UrlInstances() {
this.(DataFlow::AttrRead).getObject() = instance() and
this.(DataFlow::AttrRead).getAttributeName() = "url"
}
}
}
/**
* Provides models for the `starlette.requests.URL` class
*
* See the URL part of https://www.starlette.io/websockets/.
*/
module URL {
/** Gets a reference to the `starlette.requests.URL` class. */
private API::Node classRef() {
result = API::moduleImport("starlette").getMember("requests").getMember("URL")
}
/**
* A source of instances of `starlette.requests.URL`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `URL::instance()` to get references to instances of `starlette.requests.URL`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `starlette.requests.URL`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }
}
/** Gets a reference to an instance of `starlette.requests.URL`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `starlette.requests.URL`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `starlette.requests.URL`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "starlette.requests.URL" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in [
"components", "netloc", "path", "query", "fragment", "username", "password", "hostname",
"port"
]
}
override string getMethodName() { none() }
override string getAsyncMethodName() { none() }
}
/** An attribute read on a `starlette.requests.URL` instance that is a `urllib.parse.SplitResult` instance. */
private class UrlSplitInstances extends Stdlib::SplitResult::InstanceSource instanceof DataFlow::AttrRead {
UrlSplitInstances() {
super.getObject() = instance() and
super.getAttributeName() = "components"
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,101 @@
/**
* Provides classes modeling security-relevant aspects of the `toml` PyPI package.
*
* See
* - https://pypi.org/project/toml/
* - https://github.com/uiri/toml#api-reference
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides classes modeling security-relevant aspects of the `toml` PyPI package
*
* See
* - https://pypi.org/project/toml/
* - https://github.com/uiri/toml#api-reference
*/
private module Toml {
/**
* A call to `toml.loads`
*
* See https://github.com/uiri/toml#api-reference
*/
private class TomlLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
TomlLoadsCall() {
this = API::moduleImport("toml").getMember("loads").getACall()
or
this = API::moduleImport("toml").getMember("decoder").getMember("loads").getACall()
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("s")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "TOML" }
}
/**
* A call to `toml.load`
*
* See https://github.com/uiri/toml#api-reference
*/
private class TomlLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
TomlLoadCall() {
this = API::moduleImport("toml").getMember("load").getACall()
or
this = API::moduleImport("toml").getMember("decoder").getMember("load").getACall()
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("f")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "TOML" }
}
/**
* A call to `toml.dumps`
*
* See https://github.com/uiri/toml#api-reference
*/
private class TomlDumpsCall extends Encoding::Range, DataFlow::CallCfgNode {
TomlDumpsCall() {
this = API::moduleImport("toml").getMember("dumps").getACall()
or
this = API::moduleImport("toml").getMember("encoder").getMember("dumps").getACall()
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("o")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "TOML" }
}
/**
* A call to `toml.dump`
*
* See https://github.com/uiri/toml#api-reference
*/
private class TomlDumpCall extends Encoding::Range, DataFlow::CallCfgNode {
TomlDumpCall() {
this = API::moduleImport("toml").getMember("dump").getACall()
or
this = API::moduleImport("toml").getMember("encoder").getMember("dump").getACall()
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("o")] }
override DataFlow::Node getOutput() { result in [this.getArg(1), this.getArgByName("f")] }
override string getFormat() { result = "TOML" }
}
}

View File

@@ -62,6 +62,22 @@ private DataFlow::TypeTrackingNode poorMansFunctionTracker(DataFlow::TypeTracker
exists(DataFlow::TypeTracker t2 | result = poorMansFunctionTracker(t2, func).track(t2, t))
}
/**
* Gets a reference to `func`. `func` must be defined inside a class, and the reference
* will be inside a different method of the same class.
*/
private DataFlow::Node getSimpleMethodReferenceWithinClass(Function func) {
// TODO: Should take MRO into account
exists(Class cls, Function otherFunc, DataFlow::Node selfRefOtherFunc |
pragma[only_bind_into](cls).getAMethod() = func and
pragma[only_bind_into](cls).getAMethod() = otherFunc
|
selfRefOtherFunc.getALocalSource().(DataFlow::ParameterNode).getParameter() =
otherFunc.getArg(0) and
result.(DataFlow::AttrRead).accesses(selfRefOtherFunc, func.getName())
)
}
/**
* INTERNAL: Do not use.
*
@@ -80,7 +96,20 @@ private DataFlow::TypeTrackingNode poorMansFunctionTracker(DataFlow::TypeTracker
* inst = MyClass()
* print(inst.my_method)
* ```
*
* But it is able to handle simple method calls within a class (but does not take MRO into
* account).
* ```py
* class MyClass:
* def method1(self);
* pass
*
* def method2(self);
* self.method1()
* ```
*/
DataFlow::Node poorMansFunctionTracker(Function func) {
poorMansFunctionTracker(DataFlow::TypeTracker::end(), func).flowsTo(result)
or
result = getSimpleMethodReferenceWithinClass(func)
}

View File

@@ -0,0 +1,209 @@
/**
* INTERNAL: Do not use.
*
* Has predicates to help find subclasses in library code. Should only be used to aid in
* the manual library modeling process,
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.ApiGraphs
private import semmle.python.filters.Tests
// very much inspired by the draft at https://github.com/github/codeql/pull/5632
private module NotExposed {
// Instructions:
// This needs to be automated better, but for this prototype, here are some rough instructions:
// 0) get a database of the library you are about to model
// 1) fill out the `getAlreadyModeledClass` body below
// 2) quick-eval the `quickEvalMe` predicate below, and copy the output to your modeling predicate
class MySpec extends FindSubclassesSpec {
MySpec() { this = "MySpec" }
override API::Node getAlreadyModeledClass() {
// FILL ME OUT ! (but don't commit with any changes)
none()
// for example
// result = API::moduleImport("rest_framework").getMember("views").getMember("APIView")
}
}
predicate quickEvalMe(string newImport) {
newImport =
"// imports generated by python/frameworks/internal/SubclassFinder.qll\n" + "this = API::" +
concat(string newModelFullyQualified |
newModel(any(MySpec spec), newModelFullyQualified, _, _, _)
|
fullyQualifiedToAPIGraphPath(newModelFullyQualified), " or this = API::"
)
}
// ---------------------------------------------------------------------------
// Implementation below
// ---------------------------------------------------------------------------
//
// We are looking to find all subclassed of the already modelled classes, and ideally
// we would identify an `API::Node` for each (then `toString` would give the API
// path).
//
// An inherent problem with API graphs is that there doesn't need to exist a result
// for the API graph path that we want to add to our modeling (the path to the new
// subclass). As an example, the following query has no results when evaluated against
// a django/django DB.
//
// select API::moduleImport("django") .getMember("contrib") .getMember("admin")
// .getMember("views") .getMember("main") .getMember("ChangeListSearchForm")
//
//
// Since it is a Form subclass that we would want to capture for our Django modeling,
// we want to extend our modeling (that is written in a qll file) with exactly that
// piece of code, but since the API::Node doesn't exist, we can't select that from a
// predicate and print its path. We need a different approach, and for that we use
// fully qualified names to capture new classes/new aliases, and transform these into
// API paths (to be included in the modeling that is inserted into the `.qll` files),
// see `fullyQualifiedToAPIGraphPath`.
//
// NOTE: this implementation was originally created to help with automatically
// modeling packages in mind, and has been adjusted to help with manual library
// modeling. See https://github.com/github/codeql/pull/5632 for more discussion.
//
//
bindingset[fullyQaulified]
string fullyQualifiedToAPIGraphPath(string fullyQaulified) {
result = "moduleImport(\"" + fullyQaulified.replaceAll(".", "\").getMember(\"") + "\")"
}
bindingset[this]
abstract class FindSubclassesSpec extends string {
abstract API::Node getAlreadyModeledClass();
}
/**
* Holds if `newModelFullyQualified` describes either a new subclass, or a new alias, belonging to `spec` that we should include in our automated modeling.
* This new element is defined by `ast`, which is defined at `loc` in the module `mod`.
*/
query predicate newModel(
FindSubclassesSpec spec, string newModelFullyQualified, AstNode ast, Module mod, Location loc
) {
(
newSubclass(spec, newModelFullyQualified, ast, mod, loc)
or
newDirectAlias(spec, newModelFullyQualified, ast, mod, loc)
or
newImportStar(spec, newModelFullyQualified, ast, mod, _, _, loc)
)
}
API::Node newOrExistingModeling(FindSubclassesSpec spec) {
result = spec.getAlreadyModeledClass()
or
exists(string newSubclassName |
newModel(spec, newSubclassName, _, _, _) and
result.getPath() = fullyQualifiedToAPIGraphPath(newSubclassName)
)
}
bindingset[fullyQualifiedName]
predicate alreadyModeled(FindSubclassesSpec spec, string fullyQualifiedName) {
fullyQualifiedToAPIGraphPath(fullyQualifiedName) = spec.getAlreadyModeledClass().getPath()
}
predicate isNonTestProjectCode(AstNode ast) {
not ast.getScope*() instanceof TestScope and
not ast.getLocation().getFile().getRelativePath().matches("tests/%") and
exists(ast.getLocation().getFile().getRelativePath())
}
predicate hasAllStatement(Module mod) {
exists(AssignStmt a, GlobalVariable all |
a.defines(all) and
a.getScope() = mod and
all.getId() = "__all__"
)
}
/**
* Holds if `newAliasFullyQualified` describes new alias originating from the import
* `from <module> import <member> [as <new-name>]`, where `<module>.<member>` belongs to
* `spec`.
* So if this import happened in module `foo.bar`, `newAliasFullyQualified` would be
* `foo.bar.<member>` (or `foo.bar.<new-name>`).
*
* Note that this predicate currently respects `__all__` in sort of a backwards fashion.
* - if `__all__` is defined in module `foo.bar`, we only allow new aliases where the member name is also in `__all__`. (this doesn't map 100% to the semantics of imports though)
* - If `__all__` is not defined we don't impose any limitations.
*
* Also note that we don't currently consider deleting module-attributes at all, so in the code snippet below, we would consider that `my_module.foo` is a
* reference to `django.foo`, although `my_module.foo` isn't even available at runtime. (there currently also isn't any code to discover that `my_module.bar`
* is an alias to `django.foo`)
* ```py
* # module my_module
* from django import foo
* bar = foo
* del foo
* ```
*/
predicate newDirectAlias(
FindSubclassesSpec spec, string newAliasFullyQualified, ImportMember importMember, Module mod,
Location loc
) {
importMember = newOrExistingModeling(spec).getAUse().asExpr() and
importMember.getScope() = mod and
loc = importMember.getLocation() and
(
mod.isPackageInit() and
newAliasFullyQualified = mod.getPackageName() + "." + importMember.getName()
or
not mod.isPackageInit() and
newAliasFullyQualified = mod.getName() + "." + importMember.getName()
) and
(
not hasAllStatement(mod)
or
mod.declaredInAll(importMember.getName())
) and
not alreadyModeled(spec, newAliasFullyQualified) and
isNonTestProjectCode(importMember)
}
/** same as `newDirectAlias` predicate, but handling `from <module> import *`, considering all `<member>`, where `<module>.<member>` belongs to `spec`. */
predicate newImportStar(
FindSubclassesSpec spec, string newAliasFullyQualified, ImportStar importStar, Module mod,
API::Node relevantClass, string relevantName, Location loc
) {
relevantClass = newOrExistingModeling(spec) and
loc = importStar.getLocation() and
importStar.getScope() = mod and
// WHAT A HACK :D :D
relevantClass.getPath() =
relevantClass.getAPredecessor().getPath() + ".getMember(\"" + relevantName + "\")" and
relevantClass.getAPredecessor().getAUse().asExpr() = importStar.getModule() and
(
mod.isPackageInit() and
newAliasFullyQualified = mod.getPackageName() + "." + relevantName
or
not mod.isPackageInit() and
newAliasFullyQualified = mod.getName() + "." + relevantName
) and
(
not hasAllStatement(mod)
or
mod.declaredInAll(relevantName)
) and
not alreadyModeled(spec, newAliasFullyQualified) and
isNonTestProjectCode(importStar)
}
/** Holds if `classExpr` defines a new subclass that belongs to `spec`, which has the fully qualified name `newSubclassQualified`. */
predicate newSubclass(
FindSubclassesSpec spec, string newSubclassQualified, ClassExpr classExpr, Module mod,
Location loc
) {
classExpr = newOrExistingModeling(spec).getASubclass*().getAUse().asExpr() and
classExpr.getScope() = mod and
newSubclassQualified = mod.getName() + "." + classExpr.getName() and
loc = classExpr.getLocation() and
not alreadyModeled(spec, newSubclassQualified) and
isNonTestProjectCode(classExpr)
}
}

View File

@@ -0,0 +1,47 @@
/**
* INTERNAL: Do not use.
*
* Provides helper class for defining additional API graph edges.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
/**
* INTERNAL: Do not use.
*
* Holds if `result` is the result of awaiting `awaitedValue`.
*/
cached
DataFlow::Node awaited(DataFlow::Node awaitedValue) {
// `await` x
// - `awaitedValue` is `x`
// - `result` is `await x`
exists(Await await |
await.getValue() = awaitedValue.asExpr() and
result.asExpr() = await
)
or
// `async for x in l`
// - `awaitedValue` is `l`
// - `result` is `l` (`x` is behind a read step)
exists(AsyncFor asyncFor |
// To consider `x` the result of awaiting, we would use asyncFor.getTarget() = awaitedValue.asExpr(),
// but that is behind a read step rather than a flow step.
asyncFor.getIter() = awaitedValue.asExpr() and
result.asExpr() = asyncFor.getIter()
)
or
// `async with x as y`
// - `awaitedValue` is `x`
// - `result` is `x` and `y` if it exists
exists(AsyncWith asyncWith |
awaitedValue.asExpr() = asyncWith.getContextExpr() and
result.asExpr() in [
// `x`
asyncWith.getContextExpr(),
// `y`, if it exists
asyncWith.getOptionalVars()
]
)
}

View File

@@ -89,7 +89,7 @@ class PythonFunctionObjectInternal extends CallableObjectInternal, TPythonFuncti
origin = CfgOrigin::fromCfgNode(forigin)
)
or
procedureReturnsNone(callee, obj, origin)
this.procedureReturnsNone(callee, obj, origin)
}
private predicate procedureReturnsNone(
@@ -382,7 +382,7 @@ class BuiltinMethodObjectInternal extends CallableObjectInternal, TBuiltinMethod
/**
* Class representing bound-methods.
* Note that built-in methods, such as `[].append` are also represented as bound-methods.
* Although built-in methods and bound-methods are distinct classes in CPython, their behaviour
* Although built-in methods and bound-methods are distinct classes in CPython, their behavior
* is the same and we treat them identically.
*/
class BoundMethodObjectInternal extends CallableObjectInternal, TBoundMethod {

View File

@@ -248,7 +248,7 @@ class UnicodeObjectInternal extends ConstantObjectInternal, TUnicode {
override ObjectInternal getClass() { result = TBuiltinClassObject(Builtin::special("unicode")) }
override Builtin getBuiltin() {
result.(Builtin).strValue() = this.strValue() and
result.strValue() = this.strValue() and
result.getClass() = Builtin::special("unicode")
}
@@ -281,7 +281,7 @@ class BytesObjectInternal extends ConstantObjectInternal, TBytes {
override ObjectInternal getClass() { result = TBuiltinClassObject(Builtin::special("bytes")) }
override Builtin getBuiltin() {
result.(Builtin).strValue() = this.strValue() and
result.strValue() = this.strValue() and
result.getClass() = Builtin::special("bytes")
}

View File

@@ -27,7 +27,8 @@ class PropertyInternal extends ObjectInternal, TProperty {
or
// x = property(getter, setter, deleter)
exists(ControlFlowNode setter_arg |
setter_arg = getCallNode().getArg(1) or setter_arg = getCallNode().getArgByName("fset")
setter_arg = this.getCallNode().getArg(1) or
setter_arg = this.getCallNode().getArgByName("fset")
|
PointsToInternal::pointsTo(setter_arg, this.getContext(), result, _)
)
@@ -43,7 +44,8 @@ class PropertyInternal extends ObjectInternal, TProperty {
or
// x = property(getter, setter, deleter)
exists(ControlFlowNode deleter_arg |
deleter_arg = getCallNode().getArg(2) or deleter_arg = getCallNode().getArgByName("fdel")
deleter_arg = this.getCallNode().getArg(2) or
deleter_arg = this.getCallNode().getArgByName("fdel")
|
PointsToInternal::pointsTo(deleter_arg, this.getContext(), result, _)
)

View File

@@ -138,8 +138,8 @@ class Value extends TObject {
* The result can be `none()`, but never both `true` and `false`.
*/
boolean getDefiniteBooleanValue() {
result = getABooleanValue() and
not (getABooleanValue() = true and getABooleanValue() = false)
result = this.getABooleanValue() and
not (this.getABooleanValue() = true and this.getABooleanValue() = false)
}
}
@@ -147,9 +147,7 @@ class Value extends TObject {
* Class representing modules in the Python program
* Each `ModuleValue` represents a module object in the Python program.
*/
class ModuleValue extends Value {
ModuleValue() { this instanceof ModuleObjectInternal }
class ModuleValue extends Value instanceof ModuleObjectInternal {
/**
* Holds if this module "exports" name.
* That is, does it define `name` in `__all__` or is
@@ -159,7 +157,7 @@ class ModuleValue extends Value {
predicate exports(string name) { PointsTo::moduleExports(this, name) }
/** Gets the scope for this module, provided that it is a Python module. */
ModuleScope getScope() { result = this.(ModuleObjectInternal).getSourceModule() }
ModuleScope getScope() { result = super.getSourceModule() }
/**
* Gets the container path for this module. Will be the file for a Python module,
@@ -181,7 +179,7 @@ class ModuleValue extends Value {
predicate isPackage() { this instanceof PackageObjectInternal }
/** Whether the complete set of names "exported" by this module can be accurately determined */
predicate hasCompleteExportInfo() { this.(ModuleObjectInternal).hasCompleteExportInfo() }
predicate hasCompleteExportInfo() { super.hasCompleteExportInfo() }
/** Get a module that this module imports */
ModuleValue getAnImportedModule() { result.importedAs(this.getScope().getAnImportedModuleName()) }
@@ -199,7 +197,7 @@ class ModuleValue extends Value {
/** When used (exclusively) as a script (will not include normal modules that can also be run as a script) */
predicate isUsedAsScript() {
not isUsedAsModule() and
not this.isUsedAsModule() and
(
not this.getPath().getExtension() = "py"
or
@@ -452,23 +450,21 @@ class CallableValue extends Value {
* Class representing bound-methods, such as `o.func`, where `o` is an instance
* of a class that has a callable attribute `func`.
*/
class BoundMethodValue extends CallableValue {
BoundMethodValue() { this instanceof BoundMethodObjectInternal }
class BoundMethodValue extends CallableValue instanceof BoundMethodObjectInternal {
/**
* Gets the callable that will be used when `this` is called.
* The actual callable for `func` in `o.func`.
*/
CallableValue getFunction() { result = this.(BoundMethodObjectInternal).getFunction() }
CallableValue getFunction() { result = super.getFunction() }
/**
* Gets the value that will be used for the `self` parameter when `this` is called.
* The value for `o` in `o.func`.
*/
Value getSelf() { result = this.(BoundMethodObjectInternal).getSelf() }
Value getSelf() { result = super.getSelf() }
/** Gets the parameter node that will be used for `self`. */
NameNode getSelfParameter() { result = this.(BoundMethodObjectInternal).getSelfParameter() }
NameNode getSelfParameter() { result = super.getSelfParameter() }
}
/**
@@ -831,12 +827,10 @@ class BuiltinMethodValue extends FunctionValue {
/**
* A class representing sequence objects with a length and tracked items.
*/
class SequenceValue extends Value {
SequenceValue() { this instanceof SequenceObjectInternal }
class SequenceValue extends Value instanceof SequenceObjectInternal {
Value getItem(int n) { result = super.getItem(n) }
Value getItem(int n) { result = this.(SequenceObjectInternal).getItem(n) }
int length() { result = this.(SequenceObjectInternal).length() }
int length() { result = super.length() }
}
/** A class representing tuple objects */
@@ -887,14 +881,12 @@ class NumericValue extends Value {
* https://docs.python.org/3/howto/descriptor.html#properties
* https://docs.python.org/3/library/functions.html#property
*/
class PropertyValue extends Value {
PropertyValue() { this instanceof PropertyInternal }
class PropertyValue extends Value instanceof PropertyInternal {
CallableValue getGetter() { result = super.getGetter() }
CallableValue getGetter() { result = this.(PropertyInternal).getGetter() }
CallableValue getSetter() { result = super.getSetter() }
CallableValue getSetter() { result = this.(PropertyInternal).getSetter() }
CallableValue getDeleter() { result = this.(PropertyInternal).getDeleter() }
CallableValue getDeleter() { result = super.getDeleter() }
}
/** A method-resolution-order sequence of classes */

View File

@@ -175,7 +175,7 @@ newtype TObject =
not count(instantiation.getAnArg()) = 1 and
Types::getMro(metacls).contains(TType())
} or
/** Represents `sys.version_info`. Acts like a tuple with a range of values depending on the version being analysed. */
/** Represents `sys.version_info`. Acts like a tuple with a range of values depending on the version being analyzed. */
TSysVersionInfo() or
/** Represents a module that is inferred to perhaps exist, but is not present in the database. */
TAbsentModule(string name) { missing_imported_module(_, _, name) } or

View File

@@ -75,9 +75,9 @@ class ClassList extends TClassList {
this = Empty() and result = ""
or
exists(ClassObjectInternal head | head = this.getHead() |
this.getTail() = Empty() and result = className(head)
this.getTail() = Empty() and result = this.className(head)
or
this.getTail() != Empty() and result = className(head) + ", " + this.getTail().contents()
this.getTail() != Empty() and result = this.className(head) + ", " + this.getTail().contents()
)
}
@@ -331,9 +331,9 @@ private class ClassListList extends TClassListList {
ClassObjectInternal bestMergeCandidate(int n) {
exists(ClassObjectInternal head | head = this.getItem(n).getHead() |
legalMergeCandidate(head) and result = head
this.legalMergeCandidate(head) and result = head
or
illegalMergeCandidate(head) and result = this.bestMergeCandidate(n + 1)
this.illegalMergeCandidate(head) and result = this.bestMergeCandidate(n + 1)
)
}

View File

@@ -656,6 +656,7 @@ module PointsToInternal {
builtin_not_in_outer_scope(def, context, value, origin)
}
pragma[nomagic]
private predicate undefined_variable(
ScopeEntryDefinition def, PointsToContext context, ObjectInternal value, ControlFlowNode origin
) {
@@ -674,6 +675,7 @@ module PointsToInternal {
origin = def.getDefiningNode()
}
pragma[nomagic]
private predicate builtin_not_in_outer_scope(
ScopeEntryDefinition def, PointsToContext context, ObjectInternal value, ControlFlowNode origin
) {
@@ -914,7 +916,7 @@ private module InterModulePointsTo {
private predicate exportsSubmodule(Folder folder, string name) {
name.regexpMatch("\\p{L}(\\p{L}|\\d|_)*") and
(
exists(Folder child | child = folder.getChildContainer(name))
folder.getChildContainer(name) instanceof Folder
or
exists(folder.getFile(name + ".py"))
)
@@ -1195,16 +1197,22 @@ module InterProceduralPointsTo {
ControlFlowNode argument, PointsToContext caller, ParameterDefinition param,
PointsToContext callee
) {
PointsToInternal::pointsTo(argument, caller, _, _) and
exists(CallNode call, Function func, int offset |
callsite_calls_function(call, caller, func, callee, offset)
|
exists(string name |
argument = call.getArgByName(name) and
param.getParameter() = func.getArgByName(name)
function_parameter_name(func, param, name)
)
)
}
pragma[nomagic]
private predicate function_parameter_name(Function func, ParameterDefinition param, string name) {
param.getParameter() = func.getArgByName(name)
}
/**
* Holds if the `call` with context `caller` calls the function `scope` in context `callee`
* and the offset from argument to parameter is `parameter_offset`
@@ -1326,13 +1334,13 @@ module InterProceduralPointsTo {
predicate callsite_points_to(
CallsiteRefinement def, PointsToContext context, ObjectInternal value, CfgOrigin origin
) {
exists(SsaSourceVariable srcvar | srcvar = def.getSourceVariable() |
exists(SsaSourceVariable srcvar | pragma[only_bind_into](srcvar) = def.getSourceVariable() |
if srcvar instanceof EscapingAssignmentGlobalVariable
then
/* If global variable can be reassigned, we need to track it through calls */
exists(EssaVariable var, Function func, PointsToContext callee |
callsite_calls_function(def.getCall(), context, func, callee, _) and
var_at_exit(srcvar, func, var) and
var_at_exit(pragma[only_bind_into](srcvar), func, var) and
PointsToInternal::variablePointsTo(var, callee, value, origin)
)
or

View File

@@ -375,7 +375,7 @@ abstract class RegexString extends Expr {
// 32-bit hex value \Uhhhhhhhh
this.getChar(start + 1) = "U" and end = start + 10
or
escapedName(start, end)
this.escapedName(start, end)
or
// escape not handled above, update when adding a new case
not this.getChar(start + 1) in ["x", "u", "U", "N"] and
@@ -437,11 +437,18 @@ abstract class RegexString extends Expr {
}
predicate specialCharacter(int start, int end, string char) {
not this.inCharSet(start) and
this.character(start, end) and
end = start + 1 and
char = this.getChar(start) and
(char = "$" or char = "^" or char = ".") and
not this.inCharSet(start)
(
end = start + 1 and
char = this.getChar(start) and
(char = "$" or char = "^" or char = ".")
or
end = start + 2 and
this.escapingChar(start) and
char = this.getText().substring(start, end) and
char = ["\\A", "\\Z", "\\b", "\\B"]
)
}
/** Whether the text in the range start,end is a group */
@@ -454,6 +461,7 @@ abstract class RegexString extends Expr {
/** Gets the number of the group in start,end */
int getGroupNumber(int start, int end) {
this.group(start, end) and
not this.non_capturing_group_start(start, _) and
result =
count(int i | this.group(i, _) and i < start and not this.non_capturing_group_start(i, _)) + 1
}
@@ -900,7 +908,8 @@ abstract class RegexString extends Expr {
exists(int x | this.firstPart(x, end) |
this.emptyMatchAtStartGroup(x, start) or
this.qualifiedItem(x, start, true, _) or
this.specialCharacter(x, start, "^")
// ^ and \A match the start of the string
this.specialCharacter(x, start, ["^", "\\A"])
)
or
exists(int y | this.firstPart(start, y) |
@@ -925,9 +934,8 @@ abstract class RegexString extends Expr {
or
this.qualifiedItem(end, y, true, _)
or
this.specialCharacter(end, y, "$")
or
y = end + 2 and this.escapingChar(end) and this.getChar(end + 1) = "Z"
// $ and \Z match the end of the string.
this.specialCharacter(end, y, ["$", "\\Z"])
)
or
exists(int x |

View File

@@ -0,0 +1,306 @@
/**
* Provides precicates for reasoning about bad tag filter vulnerabilities.
*/
import performance.ReDoSUtil
/**
* A module for determining if a regexp matches a given string,
* and reasoning about which capture groups are filled by a given string.
*/
private module RegexpMatching {
/**
* A class to test whether a regular expression matches a string.
* Override this class and extend `test`/`testWithGroups` to configure which strings should be tested for acceptance by this regular expression.
* The result can afterwards be read from the `matches` predicate.
*
* Strings in the `testWithGroups` predicate are also tested for which capture groups are filled by the given string.
* The result is available in the `fillCaptureGroup` predicate.
*/
abstract class MatchedRegExp extends RegExpTerm {
MatchedRegExp() { this.isRootTerm() }
/**
* Holds if it should be tested whether this regular expression matches `str`.
*
* If `ignorePrefix` is true, then a regexp without a start anchor will be treated as if it had a start anchor.
* E.g. a regular expression `/foo$/` will match any string that ends with "foo",
* but if `ignorePrefix` is true, it will only match "foo".
*/
predicate test(string str, boolean ignorePrefix) {
none() // maybe overriden in subclasses
}
/**
* Same as `test(..)`, but where the `fillsCaptureGroup` afterwards tells which capture groups were filled by the given string.
*/
predicate testWithGroups(string str, boolean ignorePrefix) {
none() // maybe overriden in subclasses
}
/**
* Holds if this RegExp matches `str`, where `str` is either in the `test` or `testWithGroups` predicate.
*/
final predicate matches(string str) {
exists(State state | state = getAState(this, str.length() - 1, str, _) |
epsilonSucc*(state) = Accept(_)
)
}
/**
* Holds if matching `str` may fill capture group number `g`.
* Only holds if `str` is in the `testWithGroups` predicate.
*/
final predicate fillsCaptureGroup(string str, int g) {
exists(State s |
s = getAStateThatReachesAccept(this, _, str, _) and
g = group(s.getRepr())
)
}
}
/**
* Gets a state the regular expression `reg` can be in after matching the `i`th char in `str`.
* The regular expression is modeled as a non-determistic finite automaton,
* the regular expression can therefore be in multiple states after matching a character.
*
* It's a forward search to all possible states, and there is thus no guarantee that the state is on a path to an accepting state.
*/
private State getAState(MatchedRegExp reg, int i, string str, boolean ignorePrefix) {
// start state, the -1 position before any chars have been matched
i = -1 and
(
reg.test(str, ignorePrefix)
or
reg.testWithGroups(str, ignorePrefix)
) and
result.getRepr().getRootTerm() = reg and
isStartState(result)
or
// recursive case
result = getAStateAfterMatching(reg, _, str, i, _, ignorePrefix)
}
/**
* Gets the next state after the `prev` state from `reg`.
* `prev` is the state after matching `fromIndex` chars in `str`,
* and the result is the state after matching `toIndex` chars in `str`.
*
* This predicate is used as a step relation in the forwards search (`getAState`),
* and also as a step relation in the later backwards search (`getAStateThatReachesAccept`).
*/
private State getAStateAfterMatching(
MatchedRegExp reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
) {
// the basic recursive case - outlined into a noopt helper to make performance work out.
result = getAStateAfterMatchingAux(reg, prev, str, toIndex, fromIndex, ignorePrefix)
or
// we can skip past word boundaries if the next char is a non-word char.
fromIndex = toIndex and
prev.getRepr() instanceof RegExpWordBoundary and
prev = getAState(reg, toIndex, str, ignorePrefix) and
after(prev.getRepr()) = result and
str.charAt(toIndex + 1).regexpMatch("\\W") // \W matches any non-word char.
}
pragma[noopt]
private State getAStateAfterMatchingAux(
MatchedRegExp reg, State prev, string str, int toIndex, int fromIndex, boolean ignorePrefix
) {
prev = getAState(reg, fromIndex, str, ignorePrefix) and
fromIndex = toIndex - 1 and
exists(string char | char = str.charAt(toIndex) | specializedDeltaClosed(prev, char, result)) and
not discardedPrefixStep(prev, result, ignorePrefix)
}
/** Holds if a step from `prev` to `next` should be discarded when the `ignorePrefix` flag is set. */
private predicate discardedPrefixStep(State prev, State next, boolean ignorePrefix) {
prev = mkMatch(any(RegExpRoot r)) and
ignorePrefix = true and
next = prev
}
// The `deltaClosed` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
private predicate specializedDeltaClosed(State prev, string char, State next) {
deltaClosed(prev, specializedGetAnInputSymbolMatching(char), next)
}
// The `getAnInputSymbolMatching` relation specialized to the chars that exists in strings tested by a `MatchedRegExp`.
pragma[noinline]
private InputSymbol specializedGetAnInputSymbolMatching(string char) {
exists(string s, MatchedRegExp r |
r.test(s, _)
or
r.testWithGroups(s, _)
|
char = s.charAt(_)
) and
result = getAnInputSymbolMatching(char)
}
/**
* Gets the `i`th state on a path to the accepting state when `reg` matches `str`.
* Starts with an accepting state as found by `getAState` and searches backwards
* to the start state through the reachable states (as found by `getAState`).
*
* This predicate holds the invariant that the result state can be reached with `i` steps from a start state,
* and an accepting state can be found after (`str.length() - 1 - i`) steps from the result.
* The result state is therefore always on a valid path where `reg` accepts `str`.
*
* This predicate is only used to find which capture groups a regular expression has filled,
* and thus the search is only performed for the strings in the `testWithGroups(..)` predicate.
*/
private State getAStateThatReachesAccept(
MatchedRegExp reg, int i, string str, boolean ignorePrefix
) {
// base case, reaches an accepting state from the last state in `getAState(..)`
reg.testWithGroups(str, ignorePrefix) and
i = str.length() - 1 and
result = getAState(reg, i, str, ignorePrefix) and
epsilonSucc*(result) = Accept(_)
or
// recursive case. `next` is the next state to be matched after matching `prev`.
// this predicate is doing a backwards search, so `prev` is the result we are looking for.
exists(State next, State prev, int fromIndex, int toIndex |
next = getAStateThatReachesAccept(reg, toIndex, str, ignorePrefix) and
next = getAStateAfterMatching(reg, prev, str, toIndex, fromIndex, ignorePrefix) and
i = fromIndex and
result = prev
)
}
/** Gets the capture group number that `term` belongs to. */
private int group(RegExpTerm term) {
exists(RegExpGroup grp | grp.getNumber() = result | term.getParent*() = grp)
}
}
/** A class to test whether a regular expression matches certain HTML tags. */
class HTMLMatchingRegExp extends RegexpMatching::MatchedRegExp {
HTMLMatchingRegExp() {
// the regexp must mention "<" and ">" explicitly.
forall(string angleBracket | angleBracket = ["<", ">"] |
any(RegExpConstant term | term.getValue().matches("%" + angleBracket + "%")).getRootTerm() =
this
)
}
override predicate testWithGroups(string str, boolean ignorePrefix) {
ignorePrefix = true and
str = ["<!-- foo -->", "<!-- foo --!>", "<!- foo ->", "<foo>", "<script>"]
}
override predicate test(string str, boolean ignorePrefix) {
ignorePrefix = true and
str =
[
"<!-- foo -->", "<!- foo ->", "<!-- foo --!>", "<!-- foo\n -->", "<script>foo</script>",
"<script \n>foo</script>", "<script >foo\n</script>", "<foo ></foo>", "<foo>",
"<foo src=\"foo\"></foo>", "<script>", "<script src=\"foo\"></script>",
"<script src='foo'></script>", "<SCRIPT>foo</SCRIPT>", "<script\tsrc=\"foo\"/>",
"<script\tsrc='foo'></script>", "<sCrIpT>foo</ScRiPt>", "<script src=\"foo\">foo</script >",
"<script src=\"foo\">foo</script foo=\"bar\">", "<script src=\"foo\">foo</script\t\n bar>"
]
}
}
/**
* Holds if `regexp` matches some HTML tags, but misses some HTML tags that it should match.
*
* When adding a new case to this predicate, make sure the test string used in `matches(..)` calls are present in `HTMLMatchingRegExp::test` / `HTMLMatchingRegExp::testWithGroups`.
*/
predicate isBadRegexpFilter(HTMLMatchingRegExp regexp, string msg) {
// CVE-2021-33829 - matching both "<!-- foo -->" and "<!-- foo --!>", but in different capture groups
regexp.matches("<!-- foo -->") and
regexp.matches("<!-- foo --!>") and
exists(int a, int b | a != b |
regexp.fillsCaptureGroup("<!-- foo -->", a) and
// <!-- foo --> might be ambigously parsed (matching both capture groups), and that is ok here.
regexp.fillsCaptureGroup("<!-- foo --!>", b) and
not regexp.fillsCaptureGroup("<!-- foo --!>", a) and
msg =
"Comments ending with --> are matched differently from comments ending with --!>. The first is matched with capture group "
+ a + " and comments ending with --!> are matched with capture group " +
strictconcat(int i | regexp.fillsCaptureGroup("<!-- foo --!>", i) | i.toString(), ", ") +
"."
)
or
// CVE-2020-17480 - matching "<!-- foo -->" and other tags, but not "<!-- foo --!>".
exists(int group, int other |
group != other and
regexp.fillsCaptureGroup("<!-- foo -->", group) and
regexp.fillsCaptureGroup("<foo>", other) and
not regexp.matches("<!-- foo --!>") and
not regexp.fillsCaptureGroup("<!-- foo -->", any(int i | i != group)) and
not regexp.fillsCaptureGroup("<!- foo ->", group) and
not regexp.fillsCaptureGroup("<foo>", group) and
not regexp.fillsCaptureGroup("<script>", group) and
msg =
"This regular expression only parses --> (capture group " + group +
") and not --!> as a HTML comment end tag."
)
or
regexp.matches("<!-- foo -->") and
not regexp.matches("<!-- foo\n -->") and
not regexp.matches("<!- foo ->") and
not regexp.matches("<foo>") and
not regexp.matches("<script>") and
msg = "This regular expression does not match comments containing newlines."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<script \n>foo</script>") and
msg = "This regular expression matches <script></script>, but not <script \\n></script>"
or
not regexp.matches("<script >foo\n</script>") and
msg = "This regular expression matches <script>...</script>, but not <script >...\\n</script>"
)
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<script src='foo'></script>") and
not regexp.matches("<foo>") and
msg = "This regular expression does not match script tags where the attribute uses single-quotes."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src='foo'></script>") and
not regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo>") and
msg = "This regular expression does not match script tags where the attribute uses double-quotes."
or
regexp.matches("<script>foo</script>") and
regexp.matches("<script src='foo'></script>") and
not regexp.matches("<script\tsrc='foo'></script>") and
not regexp.matches("<foo>") and
not regexp.matches("<foo src=\"foo\"></foo>") and
msg = "This regular expression does not match script tags where tabs are used between attributes."
or
regexp.matches("<script>foo</script>") and
not RegExpFlags::isIgnoreCase(regexp) and
not regexp.matches("<foo>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<SCRIPT>foo</SCRIPT>") and
msg = "This regular expression does not match upper case <SCRIPT> tags."
or
not regexp.matches("<sCrIpT>foo</ScRiPt>") and
regexp.matches("<SCRIPT>foo</SCRIPT>") and
msg = "This regular expression does not match mixed case <sCrIpT> tags."
)
or
regexp.matches("<script src=\"foo\"></script>") and
not regexp.matches("<foo>") and
not regexp.matches("<foo ></foo>") and
(
not regexp.matches("<script src=\"foo\">foo</script >") and
msg = "This regular expression does not match script end tags like </script >."
or
not regexp.matches("<script src=\"foo\">foo</script foo=\"bar\">") and
msg = "This regular expression does not match script end tags like </script foo=\"bar\">."
or
not regexp.matches("<script src=\"foo\">foo</script\t\n bar>") and
msg = "This regular expression does not match script end tags like </script\\t\\n bar>."
)
}

View File

@@ -40,6 +40,10 @@ module CleartextLogging {
* A source of sensitive data, considered as a flow source.
*/
class SensitiveDataSourceAsSource extends Source, SensitiveDataSource {
SensitiveDataSourceAsSource() {
not SensitiveDataSource.super.getClassification() = SensitiveDataClassification::id()
}
override SensitiveDataClassification getClassification() {
result = SensitiveDataSource.super.getClassification()
}

View File

@@ -39,6 +39,10 @@ module CleartextStorage {
* A source of sensitive data, considered as a flow source.
*/
class SensitiveDataSourceAsSource extends Source, SensitiveDataSource {
SensitiveDataSourceAsSource() {
not SensitiveDataSource.super.getClassification() = SensitiveDataClassification::id()
}
override SensitiveDataClassification getClassification() {
result = SensitiveDataSource.super.getClassification()
}

View File

@@ -26,7 +26,11 @@ class PathNotNormalizedConfiguration extends TaintTracking::Configuration {
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Path::PathNormalization }
override predicate isSanitizer(DataFlow::Node node) {
node instanceof Sanitizer
or
node instanceof Path::PathNormalization
}
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
@@ -52,6 +56,8 @@ class FirstNormalizationConfiguration extends TaintTracking::Configuration {
override predicate isSink(DataFlow::Node sink) { sink instanceof Path::PathNormalization }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerOut(DataFlow::Node node) { node instanceof Path::PathNormalization }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
@@ -67,6 +73,8 @@ class NormalizedPathNotCheckedConfiguration extends TaintTracking2::Configuratio
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof Path::SafeAccessCheck
or

View File

@@ -32,6 +32,16 @@ module PathInjection {
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "path injection" vulnerabilities.
*
* This should only be used for things like calls to library functions that perform their own
* (correct) normalization/escaping of untrusted paths.
*
* Please also see `Path::SafeAccessCheck` and `Path::PathNormalization` Concepts.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "path injection" vulnerabilities.
*/

View File

@@ -0,0 +1,37 @@
/**
* Provides a taint-tracking configuration for detecting regular expression injection
* vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `RegexInjection::Configuration` is needed, otherwise
* `RegexInjectionCustomizations` should be imported instead.
*/
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
/**
* Provides a taint-tracking configuration for detecting regular expression injection
* vulnerabilities.
*/
module RegexInjection {
import RegexInjectionCustomizations::RegexInjection
/**
* A taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "RegexInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}

View File

@@ -0,0 +1,62 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "regular expression injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.Concepts
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
/**
* Provides default sources, sinks and sanitizers for detecting
* "regular expression injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module RegexInjection {
/**
* A data flow source for "regular expression injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A sink for "regular expression injection" vulnerabilities is the execution of a regular expression.
* If you have a custom way to execute regular expressions, you can extend `RegexExecution::Range`.
*/
class Sink extends DataFlow::Node {
RegexExecution regexExecution;
Sink() { this = regexExecution.getRegex() }
/** Gets the call that executes the regular expression marked by this sink. */
RegexExecution getRegexExecution() { result = regexExecution }
}
/**
* A sanitizer for "regular expression injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "regular expression injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A regex escaping, considered as a sanitizer.
*/
class RegexEscapingAsSanitizer extends Sanitizer {
RegexEscapingAsSanitizer() {
// Due to use-use flow, we want the output rather than an input
// (so the input can still flow to other sinks).
this = any(RegexEscaping esc).getOutput()
}
}
}

View File

@@ -0,0 +1,84 @@
/**
* Provides a taint-tracking configuration for detecting "Server-side request forgery" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `ServerSideRequestForgery::Configuration` is needed, otherwise
* `ServerSideRequestForgeryCustomizations` should be imported instead.
*/
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
/**
* Provides a taint-tracking configuration for detecting "Server-side request forgery" vulnerabilities.
*
* This configuration has a sanitizer to limit results to cases where attacker has full control of URL.
* See `PartialServerSideRequestForgery` for a variant without this requirement.
*
* You should use the `partOfFullyControlledRequest` to only select results where all
* URL parts are fully controlled.
*/
module FullServerSideRequestForgery {
import ServerSideRequestForgeryCustomizations::ServerSideRequestForgery
/**
* A taint-tracking configuration for detecting "Server-side request forgery" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "FullServerSideRequestForgery" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) {
node instanceof Sanitizer
or
node instanceof FullUrlControlSanitizer
}
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}
/**
* Holds if all URL parts of `request` is fully user controlled.
*/
predicate fullyControlledRequest(HTTP::Client::Request request) {
exists(FullServerSideRequestForgery::Configuration fullConfig |
forall(DataFlow::Node urlPart | urlPart = request.getAUrlPart() |
fullConfig.hasFlow(_, urlPart)
)
)
}
/**
* Provides a taint-tracking configuration for detecting "Server-side request forgery" vulnerabilities.
*
* This configuration has results, even when the attacker does not have full control over the URL.
* See `FullServerSideRequestForgery` for variant that has this requirement.
*/
module PartialServerSideRequestForgery {
import ServerSideRequestForgeryCustomizations::ServerSideRequestForgery
/**
* A taint-tracking configuration for detecting "Server-side request forgery" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "PartialServerSideRequestForgery" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}

View File

@@ -0,0 +1,143 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "Server-side request forgery"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
/**
* Provides default sources, sinks and sanitizers for detecting
* "Server-side request forgery"
* vulnerabilities, as well as extension points for adding your own.
*/
module ServerSideRequestForgery {
/**
* A data flow source for "Server-side request forgery" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "Server-side request forgery" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node {
/**
* Gets the request this sink belongs to.
*/
abstract HTTP::Client::Request getRequest();
}
/**
* A sanitizer for "Server-side request forgery" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer for "Server-side request forgery" vulnerabilities,
* that ensures the attacker does not have full control of the URL. (that is, might
* still be able to control path or query parameters).
*/
abstract class FullUrlControlSanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "Server-side request forgery" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/** The URL of an HTTP request, considered as a sink. */
class HttpRequestUrlAsSink extends Sink {
HTTP::Client::Request req;
HttpRequestUrlAsSink() {
req.getAUrlPart() = this and
// if we extract the stdlib code for HTTPConnection, we will also find calls that
// make requests within the HTTPConnection implementation -- for example the
// `request` method calls the `_send_request` method internally. So without this
// extra bit of code, we would give alerts within the HTTPConnection
// implementation as well, which is just annoying.
//
// Notice that we're excluding based on the request location, and not the URL part
// location, since the URL part would be in user code for the scenario above.
//
// See comment for command injection sinks for more details.
not req.getScope().getEnclosingModule().getName() in ["http.client", "httplib"]
}
override HTTP::Client::Request getRequest() { result = req }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
/**
* A string construction (concat, format, f-string) where the left side is not
* user-controlled.
*
* For all of these cases, we try to allow `http://` or `https://` on the left side
* since that will still allow full URL control.
*/
class StringConstructionAsFullUrlControlSanitizer extends FullUrlControlSanitizer {
StringConstructionAsFullUrlControlSanitizer() {
// string concat
exists(BinaryExprNode add |
add.getOp() instanceof Add and
add.getRight() = this.asCfgNode() and
not add.getLeft().getNode().(StrConst).getText().toLowerCase() in ["http://", "https://"]
)
or
// % formatting
exists(BinaryExprNode fmt |
fmt.getOp() instanceof Mod and
fmt.getRight() = this.asCfgNode() and
// detecting %-formatting is not super easy, so we simplify it to only handle
// when there is a **single** substitution going on.
not fmt.getLeft().getNode().(StrConst).getText().regexpMatch("^(?i)https?://%s[^%]*$")
)
or
// arguments to a format call
exists(DataFlow::MethodCallNode call, string httpPrefixRe |
httpPrefixRe = "^(?i)https?://(?:(\\{\\})|\\{([0-9]+)\\}|\\{([^0-9].*)\\}).*$"
|
call.getMethodName() = "format" and
(
if call.getObject().asExpr().(StrConst).getText().regexpMatch(httpPrefixRe)
then
exists(string text | text = call.getObject().asExpr().(StrConst).getText() |
// `http://{}...`
exists(text.regexpCapture(httpPrefixRe, 1)) and
this in [call.getArg(any(int i | i >= 1)), call.getArgByName(_)]
or
// `http://{123}...`
exists(int safeArgIndex | safeArgIndex = text.regexpCapture(httpPrefixRe, 2).toInt() |
this in [call.getArg(any(int i | i != safeArgIndex)), call.getArgByName(_)]
)
or
// `http://{abc}...`
exists(string safeArgName | safeArgName = text.regexpCapture(httpPrefixRe, 3) |
this in [call.getArg(_), call.getArgByName(any(string s | s != safeArgName))]
)
)
else this in [call.getArg(_), call.getArgByName(_)]
)
)
or
// f-string
exists(Fstring fstring |
if fstring.getValue(0).(StrConst).getText().toLowerCase() in ["http://", "https://"]
then fstring.getValue(any(int i | i >= 2)) = this.asExpr()
else fstring.getValue(any(int i | i >= 1)) = this.asExpr()
)
}
}
}

View File

@@ -42,6 +42,13 @@ module SqlInjection {
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A SQL statement of a SQL construction, considered as a flow sink.
*/
class SqlConstructionAsSink extends Sink {
SqlConstructionAsSink() { this = any(SqlConstruction c).getSql() }
}
/**
* A SQL statement of a SQL execution, considered as a flow sink.
*/
@@ -49,13 +56,6 @@ module SqlInjection {
SqlExecutionAsSink() { this = any(SqlExecution e).getSql() }
}
/**
* The text argument of a SQLAlchemy TextClause construction, considered as a flow sink.
*/
class TextArgAsSink extends Sink {
TextArgAsSink() { this = any(SqlAlchemy::TextClause::TextClauseConstruction tcc).getTextArg() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/

View File

@@ -1,37 +1,6 @@
/**
* Provides a taint-tracking configuration for detecting regular expression injection
* vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `RegexInjection::Configuration` is needed, otherwise
* `RegexInjectionCustomizations` should be imported instead.
*/
/** DEPRECATED: use semmle.python.security.dataflow.RegexInjection instead. */
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
private import semmle.python.security.dataflow.RegexInjection as New
/**
* Provides a taint-tracking configuration for detecting regular expression injection
* vulnerabilities.
*/
module RegexInjection {
import RegexInjectionCustomizations::RegexInjection
/**
* A taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "RegexInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}
/** DEPRECATED: use semmle.python.security.dataflow.RegexInjection instead. */
deprecated module RegexInjection = New::RegexInjection;

View File

@@ -1,62 +1,6 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "regular expression injection"
* vulnerabilities, as well as extension points for adding your own.
*/
/** DEPRECATED: use semmle.python.security.dataflow.RegexInjectionCustomizations instead. */
private import python
private import semmle.python.Concepts
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.security.dataflow.RegexInjectionCustomizations as New
/**
* Provides default sources, sinks and sanitizers for detecting
* "regular expression injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module RegexInjection {
/**
* A data flow source for "regular expression injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A sink for "regular expression injection" vulnerabilities is the execution of a regular expression.
* If you have a custom way to execute regular expressions, you can extend `RegexExecution::Range`.
*/
class Sink extends DataFlow::Node {
RegexExecution regexExecution;
Sink() { this = regexExecution.getRegex() }
/** Gets the call that executes the regular expression marked by this sink. */
RegexExecution getRegexExecution() { result = regexExecution }
}
/**
* A sanitizer for "regular expression injection" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for "regular expression injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* A regex escaping, considered as a sanitizer.
*/
class RegexEscapingAsSanitizer extends Sanitizer {
RegexEscapingAsSanitizer() {
// Due to use-use flow, we want the output rather than an input
// (so the input can still flow to other sinks).
this = any(RegexEscaping esc).getOutput()
}
}
}
/** DEPRECATED: use semmle.python.security.dataflow.RegexInjectionCustomizations instead. */
deprecated module RegexInjection = New::RegexInjection;

View File

@@ -58,7 +58,7 @@ module HeuristicNames {
*/
string maybeAccountInfo() {
result = "(?is).*acc(ou)?nt.*" or
result = "(?is).*(puid|username|userid).*" or
result = "(?is).*(puid|username|userid|session(id|key)).*" or
result = "(?s).*([uU]|^|_|[a-z](?=U))([uU][iI][dD]).*"
}

View File

@@ -140,9 +140,9 @@ class RegExpRoot extends RegExpTerm {
// there is at least one repetition
getRoot(any(InfiniteRepetitionQuantifier q)) = this and
// is actually used as a RegExp
isUsedAsRegExp() and
this.isUsedAsRegExp() and
// not excluded for library specific reasons
not isExcluded(getRootTerm().getParent())
not isExcluded(this.getRootTerm().getParent())
}
}
@@ -218,7 +218,7 @@ private newtype TInputSymbol =
recc instanceof RegExpCharacterClass and
not recc.(RegExpCharacterClass).isUniversalClass()
or
recc instanceof RegExpCharacterClassEscape
isEscapeClass(recc, _)
)
} or
/** An input symbol representing all characters matched by `.`. */
@@ -302,7 +302,7 @@ abstract class CharacterClass extends InputSymbol {
/**
* Gets a character matched by this character class.
*/
string choose() { result = getARelevantChar() and matches(result) }
string choose() { result = this.getARelevantChar() and this.matches(result) }
}
/**
@@ -340,13 +340,13 @@ private module CharacterClasses {
char <= hi
)
or
exists(RegExpCharacterClassEscape escape | escape = child |
escape.getValue() = escape.getValue().toLowerCase() and
classEscapeMatches(escape.getValue(), char)
exists(string charClass | isEscapeClass(child, charClass) |
charClass.toLowerCase() = charClass and
classEscapeMatches(charClass, char)
or
char = getARelevantChar() and
escape.getValue() = escape.getValue().toUpperCase() and
not classEscapeMatches(escape.getValue().toLowerCase(), char)
charClass.toUpperCase() = charClass and
not classEscapeMatches(charClass, char)
)
)
}
@@ -409,10 +409,10 @@ private module CharacterClasses {
or
child.(RegExpCharacterRange).isRange(_, result)
or
exists(RegExpCharacterClassEscape escape | child = escape |
result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
exists(string charClass | isEscapeClass(child, charClass) |
result = min(string s | classEscapeMatches(charClass.toLowerCase(), s))
or
result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
result = max(string s | classEscapeMatches(charClass.toLowerCase(), s))
)
)
}
@@ -466,33 +466,36 @@ private module CharacterClasses {
* An implementation of `CharacterClass` for \d, \s, and \w.
*/
private class PositiveCharacterClassEscape extends CharacterClass {
RegExpCharacterClassEscape cc;
RegExpTerm cc;
string charClass;
PositiveCharacterClassEscape() {
this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["d", "s", "w"]
}
override string getARelevantChar() {
cc.getValue() = "d" and
charClass = "d" and
result = ["0", "9"]
or
cc.getValue() = "s" and
charClass = "s" and
result = " "
or
cc.getValue() = "w" and
charClass = "w" and
result = ["a", "Z", "_", "0", "9"]
}
override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
override predicate matches(string char) { classEscapeMatches(charClass, char) }
override string choose() {
cc.getValue() = "d" and
charClass = "d" and
result = "9"
or
cc.getValue() = "s" and
charClass = "s" and
result = " "
or
cc.getValue() = "w" and
charClass = "w" and
result = "a"
}
}
@@ -501,26 +504,29 @@ private module CharacterClasses {
* An implementation of `CharacterClass` for \D, \S, and \W.
*/
private class NegativeCharacterClassEscape extends CharacterClass {
RegExpCharacterClassEscape cc;
RegExpTerm cc;
string charClass;
NegativeCharacterClassEscape() {
this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
charClass = ["D", "S", "W"]
}
override string getARelevantChar() {
cc.getValue() = "D" and
charClass = "D" and
result = ["a", "Z", "!"]
or
cc.getValue() = "S" and
charClass = "S" and
result = ["a", "9", "!"]
or
cc.getValue() = "W" and
charClass = "W" and
result = [" ", "!"]
}
bindingset[char]
override predicate matches(string char) {
not classEscapeMatches(cc.getValue().toLowerCase(), char)
not classEscapeMatches(charClass.toLowerCase(), char)
}
}
}
@@ -533,6 +539,55 @@ private class EdgeLabel extends TInputSymbol {
}
}
/**
* A RegExp term that acts like a plus.
* Either it's a RegExpPlus, or it is a range {1,X} where X is >= 30.
* 30 has been chosen as a threshold because for exponential blowup 2^30 is enough to get a decent DOS attack.
*/
private class EffectivelyPlus extends RegExpTerm {
EffectivelyPlus() {
this instanceof RegExpPlus
or
exists(RegExpRange range |
range.getLowerBound() = 1 and
(range.getUpperBound() >= 30 or not exists(range.getUpperBound()))
|
this = range
)
}
}
/**
* A RegExp term that acts like a star.
* Either it's a RegExpStar, or it is a range {0,X} where X is >= 30.
*/
private class EffectivelyStar extends RegExpTerm {
EffectivelyStar() {
this instanceof RegExpStar
or
exists(RegExpRange range |
range.getLowerBound() = 0 and
(range.getUpperBound() >= 30 or not exists(range.getUpperBound()))
|
this = range
)
}
}
/**
* A RegExp term that acts like a question mark.
* Either it's a RegExpQuestion, or it is a range {0,1}.
*/
private class EffectivelyQuestion extends RegExpTerm {
EffectivelyQuestion() {
this instanceof RegExpOpt
or
exists(RegExpRange range | range.getLowerBound() = 0 and range.getUpperBound() = 1 |
this = range
)
}
}
/**
* Gets the state before matching `t`.
*/
@@ -542,7 +597,7 @@ private State before(RegExpTerm t) { result = Match(t, 0) }
/**
* Gets a state the NFA may be in after matching `t`.
*/
private State after(RegExpTerm t) {
State after(RegExpTerm t) {
exists(RegExpAlt alt | t = alt.getAChild() | result = after(alt))
or
exists(RegExpSequence seq, int i | t = seq.getChild(i) |
@@ -553,14 +608,14 @@ private State after(RegExpTerm t) {
or
exists(RegExpGroup grp | t = grp.getAChild() | result = after(grp))
or
exists(RegExpStar star | t = star.getAChild() | result = before(star))
exists(EffectivelyStar star | t = star.getAChild() | result = before(star))
or
exists(RegExpPlus plus | t = plus.getAChild() |
exists(EffectivelyPlus plus | t = plus.getAChild() |
result = before(plus) or
result = after(plus)
)
or
exists(RegExpOpt opt | t = opt.getAChild() | result = after(opt))
exists(EffectivelyQuestion opt | t = opt.getAChild() | result = after(opt))
or
exists(RegExpRoot root | t = root | result = AcceptAnySuffix(root))
}
@@ -599,7 +654,7 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
q2 = after(cc)
)
or
exists(RegExpCharacterClassEscape cc |
exists(RegExpTerm cc | isEscapeClass(cc, _) |
q1 = before(cc) and
lbl = CharClass(cc.getRawValue() + "|" + getCanonicalizationFlags(cc.getRootTerm())) and
q2 = after(cc)
@@ -611,15 +666,17 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
or
exists(RegExpGroup grp | lbl = Epsilon() | q1 = before(grp) and q2 = before(grp.getChild(0)))
or
exists(RegExpStar star | lbl = Epsilon() |
exists(EffectivelyStar star | lbl = Epsilon() |
q1 = before(star) and q2 = before(star.getChild(0))
or
q1 = before(star) and q2 = after(star)
)
or
exists(RegExpPlus plus | lbl = Epsilon() | q1 = before(plus) and q2 = before(plus.getChild(0)))
exists(EffectivelyPlus plus | lbl = Epsilon() |
q1 = before(plus) and q2 = before(plus.getChild(0))
)
or
exists(RegExpOpt opt | lbl = Epsilon() |
exists(EffectivelyQuestion opt | lbl = Epsilon() |
q1 = before(opt) and q2 = before(opt.getChild(0))
or
q1 = before(opt) and q2 = after(opt)
@@ -671,7 +728,7 @@ RegExpRoot getRoot(RegExpTerm term) {
/**
* A state in the NFA.
*/
private newtype TState =
newtype TState =
/**
* A state representing that the NFA is about to match a term.
* `i` is used to index into multi-char literals.
@@ -801,29 +858,26 @@ InputSymbol getAnInputSymbolMatching(string char) {
result = Any()
}
/**
* Holds if `state` is a start state.
*/
predicate isStartState(State state) {
state = mkMatch(any(RegExpRoot r))
or
exists(RegExpCaret car | state = after(car))
}
/**
* Predicates for constructing a prefix string that leads to a given state.
*/
private module PrefixConstruction {
/**
* Holds if `state` starts the string matched by the regular expression.
*/
private predicate isStartState(State state) {
state instanceof StateInPumpableRegexp and
(
state = Match(any(RegExpRoot r), _)
or
exists(RegExpCaret car | state = after(car))
)
}
/**
* Holds if `state` is the textually last start state for the regular expression.
*/
private predicate lastStartState(State state) {
exists(RegExpRoot root |
state =
max(State s, Location l |
max(StateInPumpableRegexp s, Location l |
isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
|
s

View File

@@ -5,6 +5,14 @@
import python
import semmle.python.RegexTreeView
/**
* Holds if `term` is an ecape class representing e.g. `\d`.
* `clazz` is which character class it represents, e.g. "d" for `\d`.
*/
predicate isEscapeClass(RegExpTerm term, string clazz) {
exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
}
/**
* Holds if the regular expression should not be considered.
*

Some files were not shown because too many files have changed in this diff Show More