Merge branch 'main' into pyloadSsl

This commit is contained in:
Porcupiney Hairs
2024-10-21 20:09:05 +05:30
5970 changed files with 320700 additions and 173930 deletions

View File

@@ -30,6 +30,51 @@ module FastApi {
API::Node instance() { result = cls().getReturn() }
}
/**
* A call to `app.add_middleware` adding a generic middleware.
*/
private class AddMiddlewareCall extends DataFlow::CallCfgNode {
AddMiddlewareCall() { this = App::instance().getMember("add_middleware").getACall() }
/**
* Gets the string corresponding to the middleware
*/
string getMiddlewareName() { result = this.getArg(0).asExpr().(Name).getId() }
}
/**
* A call to `app.add_middleware` adding CORSMiddleware.
*/
class AddCorsMiddlewareCall extends Http::Server::CorsMiddleware::Range, AddMiddlewareCall {
/**
* Gets the string corresponding to the middleware
*/
override string getMiddlewareName() { result = this.getArg(0).asExpr().(Name).getId() }
/**
* Gets the dataflow node corresponding to the allowed CORS origins
*/
override DataFlow::Node getOrigins() { result = this.getArgByName("allow_origins") }
/**
* Gets the boolean value corresponding to if CORS credentials is enabled
* (`true`) or disabled (`false`) by this node.
*/
override DataFlow::Node getCredentialsAllowed() {
result = this.getArgByName("allow_credentials")
}
/**
* Gets the dataflow node corresponding to the allowed CORS methods
*/
DataFlow::Node getMethods() { result = this.getArgByName("allow_methods") }
/**
* Gets the dataflow node corresponding to the allowed CORS headers
*/
DataFlow::Node getHeaders() { result = this.getArgByName("allow_headers") }
}
/**
* Provides models for the `fastapi.APIRouter` class
*

View File

@@ -81,6 +81,24 @@ module PEP249 {
}
}
/** A call to a method that fetches rows from a previous execution. */
private class FetchMethodCall extends ThreatModelSource::Range, API::CallNode {
FetchMethodCall() {
exists(API::Node start |
start instanceof DatabaseCursor or start instanceof DatabaseConnection
|
// note: since we can't currently provide accesspaths for sources, these are all
// lumped together, although clearly the fetchmany/fetchall returns a
// list/iterable with rows.
this = start.getMember(["fetchone", "fetchmany", "fetchall"]).getACall()
)
}
override string getThreatModel() { result = "database" }
override string getSourceType() { result = "cursor.fetch*()" }
}
// ---------------------------------------------------------------------------
// asyncio implementations
// ---------------------------------------------------------------------------

View File

@@ -25,6 +25,74 @@ private import semmle.python.frameworks.data.ModelsAsData
* - https://www.starlette.io/
*/
module Starlette {
/**
* Provides models for the `starlette.app` class
*/
module App {
/** Gets import of `starlette.app`. */
API::Node cls() { result = API::moduleImport("starlette").getMember("app") }
/** Gets a reference to a Starlette application (an instance of `starlette.app`). */
API::Node instance() { result = cls().getAnInstance() }
}
/**
* A call to any of the execute methods on a `app.add_middleware`.
*/
class AddMiddlewareCall extends DataFlow::CallCfgNode {
AddMiddlewareCall() {
this = [App::instance().getMember("add_middleware").getACall(), Middleware::instance()]
}
/**
* Gets the string corresponding to the middleware
*/
string getMiddlewareName() { result = this.getArg(0).asExpr().(Name).getId() }
}
/**
* A call to any of the execute methods on a `app.add_middleware` with CORSMiddleware.
*/
class AddCorsMiddlewareCall extends AddMiddlewareCall, Http::Server::CorsMiddleware::Range {
/**
* Gets the string corresponding to the middleware
*/
override string getMiddlewareName() { result = this.getArg(0).asExpr().(Name).getId() }
override DataFlow::Node getOrigins() { result = this.getArgByName("allow_origins") }
override DataFlow::Node getCredentialsAllowed() {
result = this.getArgByName("allow_credentials")
}
/**
* Gets the dataflow node corresponding to the allowed CORS methods
*/
DataFlow::Node getMethods() { result = this.getArgByName("allow_methods") }
/**
* Gets the dataflow node corresponding to the allowed CORS headers
*/
DataFlow::Node getHeaders() { result = this.getArgByName("allow_headers") }
}
/**
* Provides models for the `starlette.middleware.Middleware` class
*
* See https://www.starlette.io/.
*/
module Middleware {
/** Gets a reference to the `starlette.middleware.Middleware` class. */
API::Node classRef() {
result = API::moduleImport("starlette").getMember("middleware").getMember("Middleware")
or
result = ModelOutput::getATypeNode("starlette.middleware.Middleware~Subclass").getASubclass*()
}
/** Gets a reference to an instance of `starlette.middleware.Middleware`. */
DataFlow::Node instance() { result = classRef().getACall() }
}
/**
* Provides models for the `starlette.websockets.WebSocket` class
*

View File

@@ -0,0 +1,189 @@
extensions:
- addsTo:
pack: codeql/python-all
extensible: sourceModel
data:
- ['os', 'Member[getenv].ReturnValue', 'environment']
- ['os', 'Member[getenvb].ReturnValue', 'environment']
- ['os', 'Member[environ]', 'environment']
- ['os', 'Member[environb]', 'environment']
- ['posix', 'Member[environ]', 'environment']
- ['sys', 'Member[argv]', 'commandargs']
- ['sys', 'Member[orig_argv]', 'commandargs']
- ['sys', 'Member[stdin]', 'stdin']
- ['builtins', 'Member[input].ReturnValue', 'stdin']
- ['builtins', 'Member[raw_input].ReturnValue', 'stdin'] # python 2 only
# if no argument is given, the default is to use sys.argv[1:]
- ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args].WithArity[0].ReturnValue', 'commandargs']
- ['os', 'Member[read].ReturnValue', 'file']
- addsTo:
pack: codeql/python-all
extensible: sinkModel
data:
- ["zipfile.ZipFile","Member[extractall].Argument[0,path:]", "path-injection"]
- addsTo:
pack: codeql/python-all
extensible: summaryModel
data:
# See https://docs.python.org/3/library/argparse.html#argparse.ArgumentParser
# note: taint flow for attribute lookups on `argparse.ArgumentParser` is handled in QL
- ["argparse.ArgumentParser", "Member[_parse_known_args,_read_args_from_files]", "Argument[0,arg_strings:]", "ReturnValue", "taint"]
- ["argparse.ArgumentParser", "Member[parse_args,parse_known_args]", "Argument[0,args:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/cgi.html#higher-level-interface
- ["cgi.FieldStorage", "Member[getfirst,getlist,getvalue]", "Argument[self]", "ReturnValue", "taint"]
# See
# - https://docs.python.org/3/glossary.html#term-mapping
# - https://docs.python.org/3/library/stdtypes.html#dict.get
- ["collections.abc.Mapping", "Member[get]", "Argument[1,default:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/contextlib.html#contextlib.ExitStack
- ["contextlib.ExitStack", "Member[enter_context]", "Argument[0,cm:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/copy.html#copy.deepcopy
- ["copy", "Member[copy,deepcopy,replace]", "Argument[0,x:]", "ReturnValue", "value"]
# See
# - https://docs.python.org/3/library/ctypes.html#ctypes.create_string_buffer
# - https://docs.python.org/3/library/ctypes.html#ctypes.create_unicode_buffer
- ["ctypes", "Member[create_string_buffer,create_unicode_buffer]", "Argument[0,init:,init_or_size:]", "ReturnValue", "taint"]
# See https://docs.python.org/3.11/distutils/apiref.html#distutils.util.change_root
- ["distutils", "Member[util].Member[change_root]", "Argument[0,new_root:,1,pathname:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/email.header.html#email.header.Header
- ["email.header.Header!", "Subclass.Call", "Argument[0,s:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/email.utils.html#email.utils.parseaddr
- ["email", "Member[utils].Member[parseaddr]", "Argument[0,addr:]", "ReturnValue", "taint"]
- ["email", "Member[utils].Member[parseaddr]", "Argument[0,addr:]", "ReturnValue.TupleElement[0,1]", "taint"]
# See See https://docs.python.org/3/library/fnmatch.html#fnmatch.filter
- ["fnmatch", "Member[filter]", "Argument[0,names:].ListElement", "ReturnValue.ListElement", "value"]
- ["fnmatch", "Member[filter]", "Argument[0,names:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/getopt.html#getopt.getopt
- ["getopt", "Member[getopt]", "Argument[0,args:]", "ReturnValue.TupleElement[1]", "taint"]
- ["getopt", "Member[getopt]", "Argument[1,shortopts:,2,longopts:]", "ReturnValue.TupleElement[0].ListElement.TupleElement[0]", "taint"]
# See https://docs.python.org/3/library/gettext.html#gettext.gettext
- ["gettext", "Member[gettext]", "Argument[0,message:]", "ReturnValue", "taint"]
# See
# - https://docs.python.org/3/library/glob.html#glob.glob
# - https://docs.python.org/3/library/glob.html#glob.iglob
- ["glob", "Member[glob,iglob]", "Argument[0,pathname:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/gzip.html#gzip.GzipFile
- ["gzip.GzipFile!", "Subclass.Call", "Argument[0,filename:]", "ReturnValue", "taint"]
# See
# - https://docs.python.org/3/library/html.html#html.escape
# - https://docs.python.org/3/library/html.html#html.unescape
- ["html", "Member[escape,unescape]", "Argument[0,s:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/html.parser.html#html.parser.HTMLParser.feed
- ["html.parser.HTMLParser", "Member[feed]", "Argument[0,data:]", "Argument[self]", "taint"]
# See https://docs.python.org/3.11/library/imp.html#imp.find_module
- ["imp", "Member[find_module]", "Argument[0,name:,1,path:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/logging.html#logging.getLevelName
# specifically the no matching case
- ["logging", "Member[getLevelName]", "Argument[0,level:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/logging.html#logging.LogRecord.getMessage
- ["logging.LogRecord", "Member[getMessage]", "Argument[self]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/mimetypes.html#mimetypes.guess_type
- ["mimetypes", "Member[guess_type]", "Argument[0,url:]", "ReturnValue", "taint"]
# See https://github.com/python/cpython/blob/main/Lib/nturl2path.py
# No user-facing documentation, unfortunately.
- ["nturl2path", "Member[pathname2url]", "Argument[0,p:]", "ReturnValue", "taint"]
- ["nturl2path", "Member[url2pathname]", "Argument[0,url:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/optparse.html#optparse.OptionParser.parse_args
- ["optparse.OptionParser", "Member[parse_args]", "Argument[0,args:,1,values:]", "ReturnValue.TupleElement[0,1]", "taint"]
# See https://docs.python.org/3/library/os.html#os.walk
- ["os", "Member[walk]", "Argument[0,top:]", "ReturnValue", "taint"]
# See https://github.com/python/cpython/blob/3.10/Lib/pathlib.py#L972-L973
- ["pathlib.Path", ".Member[__enter__]", "Argument[self]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/os.html#os.PathLike.__fspath__
- ["pathlib.PurePath", "Member[__fspath__]", "Argument[self]", "ReturnValue", "taint"]
# See
# - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.get
# - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.get_nowait
- ["queue.Queue", "Member[get,get_nowait]", "Argument[self].ListElement", "ReturnValue", "value"]
- ["queue.Queue", "Member[get,get_nowait]", "Argument[self]", "ReturnValue", "taint"]
# See
# - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.put
# - https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.put_nowait
- ["queue.Queue", "Member[put,put_nowait]", "Argument[0,item:]", "Argument[self].ListElement", "value"]
- ["queue.Queue", "Member[put,put_nowait]", "Argument[0,item:]", "Argument[self]", "taint"]
# See
# - https://docs.python.org/3/library/random.html#random.choice
# - https://docs.python.org/3/library/random.html#module-random
- ["random", "Member[choice]", "Argument[0,seq:].ListElement", "ReturnValue", "value"]
- ["random", "Member[choice]", "Argument[0,seq:].SetElement", "ReturnValue", "value"]
- ["random", "Member[choice]", "Argument[0,seq:]", "ReturnValue", "taint"]
- ["random.Random", "Member[choice]", "Argument[0,seq:].ListElement", "ReturnValue", "value"]
- ["random.Random", "Member[choice]", "Argument[0,seq:].SetElement", "ReturnValue", "value"]
- ["random.Random", "Member[choice]", "Argument[0,seq:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/shlex.html#shlex.quote
- ["shlex", "Member[quote]", "Argument[0,s:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/shutil.html#shutil.rmtree
- ["shutil", "Member[rmtree]", "Argument[0,path:]", "Argument[2,onerror:,onexc:].Parameter[1]", "taint"]
# See https://docs.python.org/3/library/shutil.html#shutil.which
- ["shutil", "Member[which]", "Argument[0,cmd:,2,path:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/subprocess.html#subprocess.Popen
- ["subprocess.Popen!", "Subclass.Call", "Argument[0,args:]", "ReturnValue", "taint"]
# See
# - https://docs.python.org/3/library/tarfile.html#tarfile.open
# - https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.open
- ["tarfile", "Member[open]", "Argument[0,name:,2,fileobj:]", "ReturnValue", "taint"]
- ["tarfile.TarFile", "Member[open]", "Argument[0,name:,2,fileobj:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/tempfile.html#tempfile.mkdtemp
- ["tempfile", "Member[mkdtemp]", "Argument[0,suffix:,1,prefix:,2,dir:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/tempfile.html#tempfile.mkstemp
- ["tempfile", "Member[mkstemp]", "Argument[0,suffix:,1,prefix:,2,dir:]", "ReturnValue.TupleElement[0,1]", "taint"]
# See https://docs.python.org/3/library/textwrap.html#textwrap.dedent
- ["textwrap", "Member[dedent]", "Argument[0,text:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/traceback.html#traceback.StackSummary.from_list
- ["traceback.StackSummary", "Member[from_list]", "Argument[0,a_list:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/typing.html#typing.cast
- ["typing", "Member[cast]", "Argument[1,val:]", "ReturnValue", "value"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.parse_qs
- ["urllib", "Member[parse].Member[parse_qs]", "Argument[0,qs:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote
- ["urllib", "Member[parse].Member[quote]", "Argument[0,string:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote_plus
- ["urllib", "Member[parse].Member[quote_plus]", "Argument[0,string:]", "ReturnValue", "taint"]
# See https://epydoc.sourceforge.net/stdlib/urllib-module.html
- ["urllib", "Member[parse].Member[splitquery]", "Argument[0,url:]", "ReturnValue.TupleElement[0,1]", "taint"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.unquote
- ["urllib", "Member[parse].Member[unquote]", "Argument[0,string:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.unquote_plus
- ["urllib", "Member[parse].Member[unquote_plus]", "Argument[0,string:]", "ReturnValue", "taint"]
# We could consider a more precise source than the first argument, namely tuple or dict content.
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode
- ["urllib", "Member[parse].Member[urlencode]", "Argument[0,query:]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin
- ["urllib", "Member[parse].Member[urljoin]", "Argument[0,base:,1,url:]", "ReturnValue", "taint"]
# See the internal documentation
# https://github.com/python/cpython/blob/3.12/Lib/zipfile/_path/__init__.py#L103-L105
- ["zipfile.CompleteDirs", "Member[namelist]", "Argument[self]", "ReturnValue", "taint"]
# See https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile
# it may be necessary to read the code to understand the taint propagation
# Constructor: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1266
- ["zipfile.ZipFile!", "Subclass.Call", "Argument[0,file:]", "ReturnValue", "taint"]
- ["zipfile.ZipFile!", "Subclass.Call", "Argument[0,file:]", "ReturnValue.Attribute[filelist].ListElement.Attribute[filename]", "value"]
# _extract_member: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1761
- ["zipfile.ZipFile", "Member[_extract_member]", "Argument[1,targetpath:]", "ReturnValue", "taint"]
# infolist: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1498-L1501
- ["zipfile.ZipFile", "Member[infolist]", "Argument[self]", "ReturnValue", "taint"]
- ["zipfile.ZipFile", "Member[infolist]", "Argument[self].Attribute[filelist]", "ReturnValue", "value"]
# namelist: https://github.com/python/cpython/blob/3.12/Lib/zipfile/__init__.py#L1494-L1496
- ["zipfile.ZipFile", "Member[namelist]", "Argument[self]", "ReturnValue", "taint"]
- addsTo:
pack: codeql/python-all
extensible: neutralModel
data: []
- addsTo:
pack: codeql/python-all
extensible: typeModel
data: []
- addsTo:
pack: codeql/python-all
extensible: typeVariableModel
data: []

View File

@@ -254,10 +254,14 @@ module Stdlib {
* See https://docs.python.org/3.9/library/logging.html#logging.Logger.
*/
module Logger {
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DD
/** Gets a reference to the `logging.Logger` class or any subclass. */
API::Node subclassRef() {
result = API::moduleImport("logging").getMember("Logger").getASubclass*()
or
result = API::moduleImport("logging").getMember("getLoggerClass").getReturn().getASubclass*()
or
result = ModelOutput::getATypeNode("logging.Logger~Subclass").getASubclass*()
}
@@ -277,6 +281,13 @@ module Stdlib {
ClassInstantiation() {
this = subclassRef().getACall()
or
this =
DD::selfTracker(subclassRef()
.getAValueReachableFromSource()
.asExpr()
.(ClassExpr)
.getInnerScope())
or
this = API::moduleImport("logging").getMember("root").asSource()
or
this = API::moduleImport("logging").getMember("getLogger").getACall()
@@ -338,7 +349,7 @@ module StdlibPrivate {
* Modeling of path related functions in the `os` module.
* Wrapped in QL module to make it easy to fold/unfold.
*/
private module OsFileSystemAccessModeling {
module OsFileSystemAccessModeling {
/**
* A call to the `os.fsencode` function.
*
@@ -395,7 +406,7 @@ module StdlibPrivate {
*
* See https://docs.python.org/3/library/os.html#os.open
*/
private class OsOpenCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
class OsOpenCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
OsOpenCall() { this = os().getMember("open").getACall() }
override DataFlow::Node getAPathArgument() {
@@ -1492,6 +1503,9 @@ module StdlibPrivate {
or
// io.open is a special case, since it is an alias for the builtin `open`
result = API::moduleImport("io").getMember("open")
or
// similarly, coecs.open calls the builtin `open`: https://github.com/python/cpython/blob/3.12/Lib/codecs.py#L918
result = API::moduleImport("codecs").getMember("open")
}
/**
@@ -1499,13 +1513,22 @@ module StdlibPrivate {
* See https://docs.python.org/3/library/functions.html#open
*/
private class OpenCall extends FileSystemAccess::Range, Stdlib::FileLikeObject::InstanceSource,
DataFlow::CallCfgNode
ThreatModelSource::Range, DataFlow::CallCfgNode
{
OpenCall() { this = getOpenFunctionRef().getACall() }
OpenCall() {
this = getOpenFunctionRef().getACall() and
// when analyzing stdlib code for os.py we wrongly assume that `os.open` is an
// alias of the builtins `open` function
not this instanceof OsFileSystemAccessModeling::OsOpenCall
}
override DataFlow::Node getAPathArgument() {
result in [this.getArg(0), this.getArgByName("file")]
}
override string getThreatModel() { result = "file" }
override string getSourceType() { result = "open()" }
}
/**
@@ -3251,11 +3274,28 @@ module StdlibPrivate {
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
input in ["Argument[0]", "Argument[pattern:]"] and
output = "ReturnValue.Attribute[pattern]" and
preservesValue = true
(
output = "ReturnValue.Attribute[pattern]" and
preservesValue = true
or
output = "ReturnValue" and
preservesValue = false
)
}
}
/**
* A base API node for regular expression functions.
* Either the `re` module or a compiled regular expression.
*/
private API::Node re(boolean compiled) {
result = API::moduleImport("re") and
compiled = false
or
result = any(RePatternSummary c).getACall().(API::CallNode).getReturn() and
compiled = true
}
/**
* A flow summary for methods returning a `re.Match` object
*
@@ -3265,17 +3305,18 @@ module StdlibPrivate {
ReMatchSummary() { this = ["re.Match", "compiled re.Match"] }
override DataFlow::CallCfgNode getACall() {
this = "re.Match" and
result = API::moduleImport("re").getMember(["match", "search", "fullmatch"]).getACall()
or
this = "compiled re.Match" and
result =
any(RePatternSummary c)
.getACall()
.(API::CallNode)
.getReturn()
.getMember(["match", "search", "fullmatch"])
.getACall()
exists(API::Node re, boolean compiled |
re = re(compiled) and
(
compiled = false and
this = "re.Match"
or
compiled = true and
this = "compiled re.Match"
)
|
result = re.getMember(["match", "search", "fullmatch"]).getACall()
)
}
override DataFlow::ArgumentNode getACallback() { none() }
@@ -3312,6 +3353,13 @@ module StdlibPrivate {
}
}
/** An API node for a `re.Match` object */
private API::Node match() {
result = any(ReMatchSummary c).getACall().(API::CallNode).getReturn()
or
result = re(_).getMember("finditer").getReturn().getASubscript()
}
/**
* A flow summary for methods on a `re.Match` object
*
@@ -3325,15 +3373,7 @@ module StdlibPrivate {
methodName in ["expand", "group", "groups", "groupdict"]
}
override DataFlow::CallCfgNode getACall() {
result =
any(ReMatchSummary c)
.getACall()
.(API::CallNode)
.getReturn()
.getMember(methodName)
.getACall()
}
override DataFlow::CallCfgNode getACall() { result = match().getMember(methodName).getACall() }
override DataFlow::ArgumentNode getACallback() { none() }
@@ -3435,6 +3475,14 @@ module StdlibPrivate {
) and
preservesValue = false
)
or
// flow from input string to attribute on match object
exists(int arg | arg = methodName.(RegexExecutionMethod).getStringArgIndex() - offset |
input in ["Argument[" + arg + "]", "Argument[string:]"] and
methodName = "finditer" and
output = "ReturnValue.ListElement.Attribute[string]" and
preservesValue = true
)
)
}
}
@@ -4207,7 +4255,11 @@ module StdlibPrivate {
// ---------------------------------------------------------------------------
// Flow summaries for functions contructing containers
// ---------------------------------------------------------------------------
/** A flow summary for `dict`. */
/**
* A flow summary for `dict`.
*
* see https://docs.python.org/3/library/stdtypes.html#dict
*/
class DictSummary extends SummarizedCallable {
DictSummary() { this = "builtins.dict" }
@@ -4218,18 +4270,28 @@ module StdlibPrivate {
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
// The positional argument contains a mapping.
// TODO: these values can be overwritten by keyword arguments
// - dict mapping
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[0].DictionaryElement[" + key + "]" and
output = "ReturnValue.DictionaryElement[" + key + "]" and
preservesValue = true
)
or
// - list-of-pairs mapping
input = "Argument[0].ListElement.TupleElement[1]" and
output = "ReturnValue.DictionaryElementAny" and
preservesValue = true
or
// The keyword arguments are added to the dictionary.
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[" + key + ":]" and
output = "ReturnValue.DictionaryElement[" + key + "]" and
preservesValue = true
)
or
// Imprecise content in the first argument ends up on the container itself.
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
@@ -4475,21 +4537,9 @@ module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(string content |
content = "ListElement"
or
content = "SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
content = "TupleElement[" + i.toString() + "]"
)
or
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
content = "DictionaryElement[" + key + "]"
)
|
input = "Argument[self]." + content and
output = "ReturnValue." + content and
exists(DataFlow::Content c |
input = "Argument[self]." + c.getMaDRepresentation() and
output = "ReturnValue." + c.getMaDRepresentation() and
preservesValue = true
)
or
@@ -4499,6 +4549,32 @@ module StdlibPrivate {
}
}
/** A flow summary for `copy.replace`. */
class ReplaceSummary extends SummarizedCallable {
ReplaceSummary() { this = "copy.replace" }
override DataFlow::CallCfgNode getACall() {
result = API::moduleImport("copy").getMember("replace").getACall()
}
override DataFlow::ArgumentNode getACallback() {
result = API::moduleImport("copy").getMember("replace").getAValueReachableFromSource()
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(CallNode c, string name, ControlFlowNode n, DataFlow::AttributeContent ac |
c.getFunction().(NameNode).getId() = "replace" or
c.getFunction().(AttrNode).getName() = "replace"
|
n = c.getArgByName(name) and
ac.getAttribute() = name and
input = "Argument[" + name + ":]" and
output = "ReturnValue." + ac.getMaDRepresentation() and
preservesValue = true
)
}
}
/**
* A flow summary for `pop` either for list or set.
* This ignores the index if given, since content is
@@ -4989,6 +5065,39 @@ module StdlibPrivate {
override string getKind() { result = Escaping::getHtmlKind() }
}
// ---------------------------------------------------------------------------
// argparse
// ---------------------------------------------------------------------------
/**
* if result of `parse_args` is tainted (because it uses command-line arguments),
* then the parsed values accesssed on any attribute lookup is also tainted.
*/
private class ArgumentParserAnyAttributeStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeFrom =
API::moduleImport("argparse")
.getMember("ArgumentParser")
.getReturn()
.getMember("parse_args")
.getReturn()
.getAValueReachableFromSource() and
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom
}
}
// ---------------------------------------------------------------------------
// sys
// ---------------------------------------------------------------------------
/**
* An access of `sys.stdin`/`sys.stdout`/`sys.stderr`, to get additional FileLike
* modeling.
*/
private class SysStandardStreams extends Stdlib::FileLikeObject::InstanceSource, DataFlow::Node {
SysStandardStreams() {
this = API::moduleImport("sys").getMember(["stdin", "stdout", "stderr"]).asSource()
}
}
}
// ---------------------------------------------------------------------------

View File

@@ -18,14 +18,19 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.FlowSummary
private import semmle.python.Concepts
/**
* A remote flow source originating from a CSV source row.
* A threat-model flow source originating from a data extension.
*/
private class RemoteFlowSourceFromCsv extends RemoteFlowSource::Range {
RemoteFlowSourceFromCsv() { this = ModelOutput::getASourceNode("remote").asSource() }
private class ThreatModelSourceFromDataExtension extends ThreatModelSource::Range {
ThreatModelSourceFromDataExtension() { this = ModelOutput::getASourceNode(_).asSource() }
override string getSourceType() { result = "Remote flow (from model)" }
override string getThreatModel() { this = ModelOutput::getASourceNode(result).asSource() }
override string getSourceType() {
result = "Source node (" + this.getThreatModel() + ") [from data-extension]"
}
}
private class SummarizedCallableFromModel extends SummarizedCallable {