Merge branch 'github:main' into jorgectf/python/deserialization

This commit is contained in:
Jorge
2022-01-31 17:48:35 +01:00
committed by GitHub
3887 changed files with 317569 additions and 114448 deletions

View File

@@ -0,0 +1,27 @@
## 0.0.7
## 0.0.6
### New Queries
* Two new queries have been added for detecting Server-side request forgery (SSRF). _Full server-side request forgery_ (`py/full-ssrf`) will only alert when the URL is fully user-controlled, and _Partial server-side request forgery_ (`py/partial-ssrf`) will alert when any part of the URL is user-controlled. Only `py/full-ssrf` will be run by default.
### Minor Analysis Improvements
* To support the new SSRF queries, the PyPI package `requests` has been modeled, along with `http.client.HTTP[S]Connection` from the standard library.
## 0.0.5
### Minor Analysis Improvements
* Added modeling of many functions from the `os` module that uses file system paths, such as `os.stat`, `os.chdir`, `os.mkdir`, and so on. All of these are new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of the `tempfile` module for creating temporary files and directories, such as the functions `tempfile.NamedTemporaryFile` and `tempfile.TemporaryDirectory`. The `suffix`, `prefix`, and `dir` arguments are all vulnerable to path-injection, and these are new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Extended the modeling of FastAPI such that `fastapi.responses.FileResponse` are considered `FileSystemAccess`, making them sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of the `posixpath`, `ntpath`, and `genericpath` modules for path operations (although these are not supposed to be used), resulting in new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of `wsgiref.simple_server` applications, leading to new remote flow sources.
## 0.0.4
### Query Metadata Changes
* Fixed the query ids of two queries that are meant for manual exploration: `python/count-untrusted-data-external-api` and `python/untrusted-data-to-external-api` have been changed to `py/count-untrusted-data-external-api` and `py/untrusted-data-to-external-api`.

View File

@@ -85,7 +85,7 @@ class CheckClass extends ClassObject {
predicate interestingUndefined(SelfAttributeRead a) {
exists(string name | name = a.getName() |
interestingContext(a, name) and
this.interestingContext(a, name) and
not this.definedInBlock(a.getAFlowNode().getBasicBlock(), name)
)
}
@@ -98,7 +98,7 @@ class CheckClass extends ClassObject {
not a.guardedByHasattr() and
a.getScope().isPublic() and
not this.monkeyPatched(name) and
not attribute_assigned_in_method(lookupAttribute("setUp"), name)
not attribute_assigned_in_method(this.lookupAttribute("setUp"), name)
}
private predicate probablyAbstract() {
@@ -127,7 +127,7 @@ class CheckClass extends ClassObject {
// so we can push the context in from there, which must apply to a
// SelfAttributeRead in the same scope
exists(SelfAttributeRead a | a.getScope() = b.getScope() and name = a.getName() |
interestingContext(a, name)
this.interestingContext(a, name)
) and
this.definitionInBlock(b, name)
or

View File

@@ -99,14 +99,14 @@ private ControlFlowNode get_a_call(Value callable) {
/** Gets the function object corresponding to the given class or function. */
FunctionObject get_function_or_initializer_objectapi(Object func_or_cls) {
result = func_or_cls.(FunctionObject)
result = func_or_cls
or
result = func_or_cls.(ClassObject).declaredAttribute("__init__")
}
/** Gets the function object corresponding to the given class or function. */
FunctionValue get_function_or_initializer(Value func_or_cls) {
result = func_or_cls.(FunctionValue)
result = func_or_cls
or
result = func_or_cls.(ClassValue).declaredAttribute("__init__")
}

View File

@@ -40,9 +40,7 @@ library class PossibleAdvancedFormatString extends StrConst {
private predicate implicitlyNumberedField(int start, int end) {
this.field(start, end) and
exists(string c | start + 1 = this.getText().indexOf(c) |
c = "}" or c = ":" or c = "!" or c = "."
)
exists(string c | start + 1 = this.getText().indexOf(c) | c in ["}", ":", "!", "."])
}
/** Whether this format string has implicitly numbered fields */

View File

@@ -32,7 +32,7 @@ predicate duplicate_char_in_class(Regex r, string char) {
//Ignore whitespace in verbose mode
not (
r.getAMode() = "VERBOSE" and
(char = " " or char = "\t" or char = "\r" or char = "\n")
char in [" ", "\t", "\r", "\n"]
)
}

View File

@@ -210,9 +210,9 @@ class CommentedOutCodeBlock extends @py_comment {
/** Whether this commented-out code block is likely to be example code embedded in a larger comment. */
predicate maybeExampleCode() {
exists(CommentBlock block | block.contains(this.(Comment)) |
exists(CommentBlock block | block.contains(this) |
exists(int all_code |
all_code = sum(CommentedOutCodeBlock code | block.contains(code.(Comment)) | code.length()) and
all_code = sum(CommentedOutCodeBlock code | block.contains(code) | code.length()) and
/* This ratio may need fine tuning */
block.length() > all_code * 2
)

View File

@@ -3,7 +3,7 @@
* @description This reports the external APIs that are used with untrusted data, along with how
* frequently the API is called, and how many unique sources of untrusted data flow
* to it.
* @id python/count-untrusted-data-external-api
* @id py/count-untrusted-data-external-api
* @kind table
* @tags security external/cwe/cwe-20
*/

View File

@@ -1,7 +1,7 @@
/**
* @name Untrusted data passed to external API
* @description Data provided remotely is used in this external API without sanitization, which could be a security risk.
* @id python/untrusted-data-to-external-api
* @id py/untrusted-data-to-external-api
* @kind path-problem
* @precision low
* @problem.severity error

View File

@@ -9,7 +9,6 @@
* @id py/path-injection
* @tags correctness
* security
* external/owasp/owasp-a1
* external/cwe/cwe-022
* external/cwe/cwe-023
* external/cwe/cwe-036

View File

@@ -10,7 +10,6 @@
* @id py/command-line-injection
* @tags correctness
* security
* external/owasp/owasp-a1
* external/cwe/cwe-078
* external/cwe/cwe-088
*/

View File

@@ -9,7 +9,6 @@
* @id py/sql-injection
* @tags security
* external/cwe/cwe-089
* external/owasp/owasp-a1
*/
import python

View File

@@ -9,7 +9,6 @@
* @precision high
* @id py/code-injection
* @tags security
* external/owasp/owasp-a1
* external/cwe/cwe-094
* external/cwe/cwe-095
* external/cwe/cwe-116

View File

@@ -0,0 +1,54 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
It is possible to match some single HTML tags using regular expressions (parsing general HTML using
regular expressions is impossible). However, if the regular expression is not written well it might
be possible to circumvent it, which can lead to cross-site scripting or other security issues.
</p>
<p>
Some of these mistakes are caused by browsers having very forgiving HTML parsers, and
will often render invalid HTML containing syntax errors.
Regular expressions that attempt to match HTML should also recognize tags containing such syntax errors.
</p>
</overview>
<recommendation>
<p>
Use a well-tested sanitization or parser library if at all possible. These libraries are much more
likely to handle corner cases correctly than a custom implementation.
</p>
</recommendation>
<example>
<p>
The following example attempts to filters out all <code>&lt;script&gt;</code> tags.
</p>
<sample src="examples/BadTagFilter.py" />
<p>
The above sanitizer does not filter out all <code>&lt;script&gt;</code> tags.
Browsers will not only accept <code>&lt;/script&gt;</code> as script end tags, but also tags such as <code>&lt;/script foo="bar"&gt;</code> even though it is a parser error.
This means that an attack string such as <code>&lt;script&gt;alert(1)&lt;/script foo="bar"&gt;</code> will not be filtered by
the function, and <code>alert(1)</code> will be executed by a browser if the string is rendered as HTML.
</p>
<p>
Other corner cases include that HTML comments can end with <code>--!&gt;</code>,
and that HTML tag names can contain upper case characters.
</p>
</example>
<references>
<li>Securitum: <a href="https://research.securitum.com/the-curious-case-of-copy-paste/">The Curious Case of Copy &amp; Paste</a>.</li>
<li>stackoverflow.com: <a href="https://stackoverflow.com/questions/1732348/regex-match-open-tags-except-xhtml-self-contained-tags#answer-1732454">You can't parse [X]HTML with regex</a>.</li>
<li>HTML Standard: <a href="https://html.spec.whatwg.org/multipage/parsing.html#comment-end-bang-state">Comment end bang state</a>.</li>
<li>stackoverflow.com: <a href="https://stackoverflow.com/questions/25559999/why-arent-browsers-strict-about-html">Why aren't browsers strict about HTML?</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,21 @@
/**
* @name Bad HTML filtering regexp
* @description Matching HTML tags using regular expressions is hard to do right, and can easily lead to security issues.
* @kind problem
* @problem.severity warning
* @security-severity 7.8
* @precision high
* @id py/bad-tag-filter
* @tags correctness
* security
* external/cwe/cwe-116
* external/cwe/cwe-020
* external/cwe/cwe-185
* external/cwe/cwe-186
*/
import semmle.python.security.BadTagFilterQuery
from HTMLMatchingRegExp regexp, string msg
where msg = min(string m | isBadRegexpFilter(regexp, m) | m order by m.length(), m) // there might be multiple, we arbitrarily pick the shortest one
select regexp, msg

View File

@@ -0,0 +1,8 @@
import re
def filterScriptTags(content):
oldContent = ""
while oldContent != content:
oldContent = content
content = re.sub(r'<script.*?>.*?</script>', '', content, flags= re.DOTALL | re.IGNORECASE)
return content

View File

@@ -11,17 +11,46 @@
*/
import python
import semmle.python.web.Http
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
FunctionValue requestFunction() { result = Module::named("requests").attr(httpVerbLower()) }
/**
* Gets a call to a method that makes an outgoing request using the `requests` module,
* such as `requests.get` or `requests.put`, with the specified HTTP verb `verb`
*/
DataFlow::CallCfgNode outgoingRequestCall(string verb) {
verb = HTTP::httpVerbLower() and
result = API::moduleImport("requests").getMember(verb).getACall()
}
/** requests treats None as the default and all other "falsey" values as False */
predicate falseNotNone(Value v) { v.getDefiniteBooleanValue() = false and not v = Value::none_() }
/** Gets the "verfiy" argument to a outgoingRequestCall. */
DataFlow::Node verifyArg(DataFlow::CallCfgNode call) {
call = outgoingRequestCall(_) and
result = call.getArgByName("verify")
}
from CallNode call, FunctionValue func, Value falsey, ControlFlowNode origin
/** Gets a back-reference to the verify argument `arg`. */
private DataFlow::TypeTrackingNode verifyArgBacktracker(
DataFlow::TypeBackTracker t, DataFlow::Node arg
) {
t.start() and
arg = verifyArg(_) and
result = arg.getALocalSource()
or
exists(DataFlow::TypeBackTracker t2 | result = verifyArgBacktracker(t2, arg).backtrack(t2, t))
}
/** Gets a back-reference to the verify argument `arg`. */
DataFlow::LocalSourceNode verifyArgBacktracker(DataFlow::Node arg) {
result = verifyArgBacktracker(DataFlow::TypeBackTracker::end(), arg)
}
from DataFlow::CallCfgNode call, DataFlow::Node falseyOrigin, string verb
where
func = requestFunction() and
func.getACall() = call and
falseNotNone(falsey) and
call.getArgByName("verify").pointsTo(falsey, origin)
select call, "Call to $@ with verify=$@", func, "requests." + func.getName(), origin, "False"
call = outgoingRequestCall(verb) and
falseyOrigin = verifyArgBacktracker(verifyArg(call)) and
// requests treats `None` as the default and all other "falsey" values as `False`.
falseyOrigin.asExpr().(ImmutableLiteral).booleanValue() = false and
not falseyOrigin.asExpr() instanceof None
select call, "Call to requests." + verb + " with verify=$@", falseyOrigin, "False"

View File

@@ -9,8 +9,8 @@
* @id py/clear-text-logging-sensitive-data
* @tags security
* external/cwe/cwe-312
* external/cwe/cwe-315
* external/cwe/cwe-359
* external/cwe/cwe-532
*/
import python

View File

@@ -88,7 +88,7 @@ abstract class TlsLibrary extends string {
/** The name of a specific protocol version. */
abstract string specific_version_name(ProtocolVersion version);
/** Gets a name, which is a member of `version_constants`, that can be used to specify the protocol family `family`. */
/** Gets a name, which is a member of `version_constants`, that can be used to specify the protocol family `family`. */
abstract string unspecific_version_name(ProtocolFamily family);
/** Gets an API node representing the module or class holding the version constants. */
@@ -96,12 +96,12 @@ abstract class TlsLibrary extends string {
/** Gets an API node representing a specific protocol version. */
API::Node specific_version(ProtocolVersion version) {
result = version_constants().getMember(specific_version_name(version))
result = this.version_constants().getMember(this.specific_version_name(version))
}
/** Gets an API node representing the protocol family `family`. */
API::Node unspecific_version(ProtocolFamily family) {
result = version_constants().getMember(unspecific_version_name(family))
result = this.version_constants().getMember(this.unspecific_version_name(family))
}
/** Gets a creation of a context with a default protocol. */
@@ -112,14 +112,14 @@ abstract class TlsLibrary extends string {
/** Gets a creation of a context with a specific protocol version, known to be insecure. */
ContextCreation insecure_context_creation(ProtocolVersion version) {
result in [specific_context_creation(), default_context_creation()] and
result in [this.specific_context_creation(), this.default_context_creation()] and
result.getProtocol() = version and
version.isInsecure()
}
/** Gets a context that was created using `family`, known to have insecure instances. */
ContextCreation unspecific_context_creation(ProtocolFamily family) {
result in [specific_context_creation(), default_context_creation()] and
result in [this.specific_context_creation(), this.default_context_creation()] and
result.getProtocol() = family
}

View File

@@ -7,6 +7,7 @@
* @precision high
* @id py/polynomial-redos
* @tags security
* external/cwe/cwe-1333
* external/cwe/cwe-730
* external/cwe/cwe-400
*/

View File

@@ -8,6 +8,7 @@
* @precision high
* @id py/redos
* @tags security
* external/cwe/cwe-1333
* external/cwe/cwe-730
* external/cwe/cwe-400
*/

View File

@@ -14,7 +14,7 @@
import python
private import semmle.python.Concepts
import semmle.python.security.injection.RegexInjection
import semmle.python.security.dataflow.RegexInjection
import DataFlow::PathGraph
from

View File

@@ -0,0 +1,11 @@
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
<qhelp>
<overview>
<include src="ServerSideRequestForgery-start.inc.qhelp" />
<!-- query specific -->
<p>This query covers full SSRF, to find partial SSRF use the <code>py/partial-ssrf</code> query.</p>
</overview>
<include src="ServerSideRequestForgery-end.inc.qhelp" />
</qhelp>

View File

@@ -0,0 +1,25 @@
/**
* @name Full server-side request forgery
* @description Making a network request to a URL that is fully user-controlled allows for request forgery attacks.
* @kind path-problem
* @problem.severity error
* @security-severity 9.1
* @precision high
* @id py/full-ssrf
* @tags security
* external/cwe/cwe-918
*/
import python
import semmle.python.security.dataflow.ServerSideRequestForgery
import DataFlow::PathGraph
from
FullServerSideRequestForgery::Configuration fullConfig, DataFlow::PathNode source,
DataFlow::PathNode sink, HTTP::Client::Request request
where
request = sink.getNode().(FullServerSideRequestForgery::Sink).getRequest() and
fullConfig.hasFlowPath(source, sink) and
fullyControlledRequest(request)
select request, source, sink, "The full URL of this request depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -0,0 +1,11 @@
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
<qhelp>
<overview>
<include src="ServerSideRequestForgery-start.inc.qhelp" />
<!-- query specific -->
<p>This query covers partial SSRF, to find full SSRF use the <code>py/full-ssrf</code> query.</p>
</overview>
<include src="ServerSideRequestForgery-end.inc.qhelp" />
</qhelp>

View File

@@ -0,0 +1,25 @@
/**
* @name Partial server-side request forgery
* @description Making a network request to a URL that is partially user-controlled allows for request forgery attacks.
* @kind path-problem
* @problem.severity error
* @security-severity 9.1
* @precision medium
* @id py/partial-ssrf
* @tags security
* external/cwe/cwe-918
*/
import python
import semmle.python.security.dataflow.ServerSideRequestForgery
import DataFlow::PathGraph
from
PartialServerSideRequestForgery::Configuration partialConfig, DataFlow::PathNode source,
DataFlow::PathNode sink, HTTP::Client::Request request
where
request = sink.getNode().(PartialServerSideRequestForgery::Sink).getRequest() and
partialConfig.hasFlowPath(source, sink) and
not fullyControlledRequest(request)
select request, source, sink, "Part of the URL of this request depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -0,0 +1,47 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<recommendation>
<p>To guard against SSRF attacks you should avoid putting user-provided input directly
into a request URL. Instead, either maintain a list of authorized URLs on the server and choose
from that list based on the input provided, or perform proper validation of the input.
</p>
</recommendation>
<example>
<p>The following example shows code vulnerable to a full SSRF attack, because it
uses untrusted input (HTTP request parameter) directly to construct a URL. By using
<code>evil.com#</code> as the <code>target</code> value, the requested URL will be
<code>https://evil.com#.example.com/data/</code>. It also shows how to remedy the
problem by using the user input select a known fixed string.
</p>
<sample src="examples/ServerSideRequestForgery_full.py" />
</example>
<example>
<p>
The following example shows code vulnerable to a partial SSRF attack, because it
uses untrusted input (HTTP request parameter) directly to construct a URL. By
using <code>../transfer-funds-to/123?amount=456</code> as the
<code>user_id</code> value, the requested URL will be
<code>https://api.example.com/transfer-funds-to/123?amount=456</code>. It also
shows how to remedy the problem by validating the input.
</p>
<sample src="examples/ServerSideRequestForgery_partial.py" />
</example>
<references>
<li>
<a href="https://owasp.org/www-community/attacks/Server_Side_Request_Forgery">OWASP SSRF article</a>
</li>
<li>
<a href="https://portswigger.net/web-security/ssrf">PortSwigger SSRF article</a>
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,31 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<fragment>
<p>Directly incorporating user input into an HTTP request without validating the input
can facilitate server-side request forgery (SSRF) attacks. In these attacks, the
request may be changed, directed at a different server, or via a different
protocol. This can allow the attacker to obtain sensitive information or perform
actions with escalated privilege.
</p>
<p>
We make a distinctions between how much of the URL an attacker can control:
</p>
<ul>
<li><b>Full SSRF</b>: where the full URL can be controlled.</li>
<li><b>Partial SSRF</b>: where only part of the URL can be controlled, such as the
path component of a URL to a hardcoded domain.</li>
</ul>
<p></p>
<p>
Partial control of a URL is often much harder to exploit. Therefore we have created a
separate query for each of these.
</p>
</fragment>
</qhelp>

View File

@@ -0,0 +1,15 @@
import requests
from flask import Flask, request
app = Flask(__name__)
@app.route("/full_ssrf")
def full_ssrf():
target = request.args["target"]
# BAD: user has full control of URL
resp = request.get("https://" + target + ".example.com/data/")
# GOOD: `subdomain` is controlled by the server.
subdomain = "europe" if target == "EU" else "world"
resp = request.get("https://" + subdomain + ".example.com/data/")

View File

@@ -0,0 +1,15 @@
import requests
from flask import Flask, request
app = Flask(__name__)
@app.route("/partial_ssrf")
def partial_ssrf():
user_id = request.args["user_id"]
# BAD: user can fully control the path component of the URL
resp = requests.get("https://api.example.com/user_info/" + user_id)
if user_id.isalnum():
# GOOD: user_id is restricted to be alpha-numeric, and cannot alter path component of URL
resp = requests.get("https://api.example.com/user_info/" + user_id)

View File

@@ -46,7 +46,7 @@ predicate mismatched_tuple_rhs(Assign a, int lcount, int rcount, Location loc) {
lcount = len(l) and
rcount = r.length() and
lcount != rcount and
not exists(Starred s | l.getAnItem() = s)
not l.getAnItem() instanceof Starred
)
}

View File

@@ -22,10 +22,7 @@ predicate modification_of_locals(ControlFlowNode f) {
attr = f.(CallNode).getFunction() and
originIsLocals(attr.getObject(mname))
|
mname = "pop" or
mname = "popitem" or
mname = "update" or
mname = "clear"
mname in ["pop", "popitem", "update", "clear"]
)
}

View File

@@ -70,10 +70,11 @@ predicate same_attribute(Attribute a1, Attribute a2) {
not is_property_access(a1)
}
pragma[nomagic]
Comment pyflakes_comment() { result.getText().toLowerCase().matches("%pyflakes%") }
int pyflakes_commented_line(File file) {
exists(Comment c | c.getText().toLowerCase().matches("%pyflakes%") |
c.getLocation().hasLocationInfo(file.getAbsolutePath(), result, _, _, _)
)
pyflakes_comment().getLocation().hasLocationInfo(file.getAbsolutePath(), result, _, _, _)
}
predicate pyflakes_commented(AssignStmt assignment) {

View File

@@ -49,7 +49,7 @@ predicate mutates_globals(ModuleValue m) {
or
// In Python 3.8, Enum._convert_ is implemented using a metaclass, and our points-to
// analysis doesn't handle that well enough. So we need a special case for this
not exists(Value enum_convert | enum_convert = enum_class.attr("_convert")) and
not exists(enum_class.attr("_convert")) and
exists(CallNode call | call.getScope() = m.getScope() |
call.getFunction().(AttrNode).getObject(["_convert", "_convert_"]).pointsTo() = enum_class
)

View File

@@ -68,7 +68,7 @@ predicate undefined_use_in_function(Name u) {
predicate undefined_use_in_class_or_module(Name u) {
exists(GlobalVariable v | u.uses(v)) and
not exists(Function f | u.getScope().getScope*() = f) and
not u.getScope().getScope*() instanceof Function and
exists(SsaVariable var | var.getAUse().getNode() = u | var.maybeUndefined()) and
not guarded_against_name_error(u) and
not exists(ModuleValue m | m.getScope() = u.getEnclosingModule() | m.hasAttribute(u.getId())) and

View File

@@ -8,7 +8,7 @@ import semmle.python.pointsto.PointsTo
import semmle.python.pointsto.PointsToContext
predicate trivial(ControlFlowNode f) {
exists(Parameter p | p = f.getNode())
f.getNode() instanceof Parameter
or
f instanceof NameConstantNode
or

View File

@@ -3,7 +3,7 @@
* @description Generates use-definition pairs that provide the data
* for jump-to-definition in the code viewer.
* @kind definitions
* @id python/ide-jump-to-definition
* @id py/ide-jump-to-definition
* @tags ide-contextual-queries/local-definitions
*/

View File

@@ -3,7 +3,7 @@
* @description Generates use-definition pairs that provide the data
* for find-references in the code viewer.
* @kind definitions
* @id python/ide-find-references
* @id py/ide-find-references
* @tags ide-contextual-queries/local-references
*/

View File

@@ -0,0 +1,4 @@
---
category: majorAnalysis
---
* User names and other account information is no longer considered to be sensitive data for the queries `py/clear-text-logging-sensitive-data` and `py/clear-text-storage-sensitive-data`, since this lead to many false positives.

View File

@@ -0,0 +1,5 @@
## 0.0.4
### Query Metadata Changes
* Fixed the query ids of two queries that are meant for manual exploration: `python/count-untrusted-data-external-api` and `python/untrusted-data-to-external-api` have been changed to `py/count-untrusted-data-external-api` and `py/untrusted-data-to-external-api`.

View File

@@ -0,0 +1,9 @@
## 0.0.5
### Minor Analysis Improvements
* Added modeling of many functions from the `os` module that uses file system paths, such as `os.stat`, `os.chdir`, `os.mkdir`, and so on. All of these are new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of the `tempfile` module for creating temporary files and directories, such as the functions `tempfile.NamedTemporaryFile` and `tempfile.TemporaryDirectory`. The `suffix`, `prefix`, and `dir` arguments are all vulnerable to path-injection, and these are new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Extended the modeling of FastAPI such that `fastapi.responses.FileResponse` are considered `FileSystemAccess`, making them sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of the `posixpath`, `ntpath`, and `genericpath` modules for path operations (although these are not supposed to be used), resulting in new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of `wsgiref.simple_server` applications, leading to new remote flow sources.

View File

@@ -0,0 +1,9 @@
## 0.0.6
### New Queries
* Two new queries have been added for detecting Server-side request forgery (SSRF). _Full server-side request forgery_ (`py/full-ssrf`) will only alert when the URL is fully user-controlled, and _Partial server-side request forgery_ (`py/partial-ssrf`) will alert when any part of the URL is user-controlled. Only `py/full-ssrf` will be run by default.
### Minor Analysis Improvements
* To support the new SSRF queries, the PyPI package `requests` has been modeled, along with `http.client.HTTP[S]Connection` from the standard library.

View File

@@ -0,0 +1 @@
## 0.0.7

View File

@@ -0,0 +1,2 @@
---
lastReleaseVersion: 0.0.7

View File

@@ -1,13 +0,0 @@
#!/bin/bash
set -Eeuo pipefail # see https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/
# Promotes new dataflow queries to be the real ones
SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
cd $SCRIPTDIR
for file in $(find . -mindepth 2); do
echo "Promoting $file"
mkdir -p "../../Security/$(dirname $file)"
mv "$file" "../../Security/${file}"
done

View File

@@ -1,35 +0,0 @@
/**
* @name Binding a socket to all network interfaces
* @description Binding a socket to all interfaces opens it up to traffic from any IPv4 address
* and is therefore associated with security risks.
* @kind problem
* @id py/old/bind-socket-all-network-interfaces
* @problem.severity error
*/
import python
Value aSocket() { result.getClass() = Value::named("socket.socket") }
CallNode socketBindCall() {
result = aSocket().attr("bind").(CallableValue).getACall() and major_version() = 3
or
result.getFunction().(AttrNode).getObject("bind").pointsTo(aSocket()) and
major_version() = 2
}
string allInterfaces() { result = "0.0.0.0" or result = "" }
Value getTextValue(string address) {
result = Value::forUnicode(address) and major_version() = 3
or
result = Value::forString(address) and major_version() = 2
}
from CallNode call, TupleValue args, string address
where
call = socketBindCall() and
call.getArg(0).pointsTo(args) and
args.getItem(0) = getTextValue(address) and
address = allInterfaces()
select call.getNode(), "'" + address + "' binds a socket to all interfaces."

View File

@@ -1,38 +0,0 @@
/**
* @name OLD QUERY: Uncontrolled data used in path expression
* @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
* @kind path-problem
* @problem.severity error
* @id py/old/path-injection
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.security.injection.Path
class PathInjectionConfiguration extends TaintTracking::Configuration {
PathInjectionConfiguration() { this = "Path injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof OpenNode }
override predicate isSanitizer(Sanitizer sanitizer) {
sanitizer instanceof PathSanitizer or
sanitizer instanceof NormalizedPathSanitizer
}
override predicate isExtension(TaintTracking::Extension extension) {
extension instanceof AbsPath
}
}
from PathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This path depends on $@.", src.getSource(),
"a user-provided value"

View File

@@ -1,36 +0,0 @@
/**
* @name OLD QUERY: Uncontrolled command line
* @description Using externally controlled strings in a command line may allow a malicious
* user to change the meaning of the command.
* @kind path-problem
* @problem.severity error
* @id py/old/command-line-injection
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.security.injection.Command
class CommandInjectionConfiguration extends TaintTracking::Configuration {
CommandInjectionConfiguration() { this = "Command injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof CommandSink }
override predicate isExtension(TaintTracking::Extension extension) {
extension instanceof FirstElementFlow
or
extension instanceof FabricExecuteExtension
}
}
from CommandInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This command depends on $@.", src.getSource(),
"a user-provided value"

View File

@@ -1,37 +0,0 @@
/**
* @name OLD QUERY: Reflected server-side cross-site scripting
* @description Writing user input directly to a web page
* allows for a cross-site scripting vulnerability.
* @kind path-problem
* @problem.severity error
* @id py/old/reflective-xss
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.web.HttpResponse
/* Flow */
import semmle.python.security.strings.Untrusted
class ReflectedXssConfiguration extends TaintTracking::Configuration {
ReflectedXssConfiguration() { this = "Reflected XSS configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) {
sink instanceof HttpResponseTaintSink and
not sink instanceof DjangoResponseContent
or
sink instanceof DjangoResponseContentXSSVulnerable
}
}
from ReflectedXssConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "Cross-site scripting vulnerability due to $@.", src.getSource(),
"a user-provided value"

View File

@@ -1,47 +0,0 @@
/**
* @name OLD QUERY: SQL query built from user-controlled sources
* @description Building a SQL query from user-controlled sources is vulnerable to insertion of
* malicious SQL code by the user.
* @kind path-problem
* @problem.severity error
* @id py/old/sql-injection
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.security.injection.Sql
import semmle.python.web.django.Db
import semmle.python.web.django.Model
class SQLInjectionConfiguration extends TaintTracking::Configuration {
SQLInjectionConfiguration() { this = "SQL injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof SqlInjectionSink }
}
/*
* Additional configuration to support tracking of DB objects. Connections, cursors, etc.
* Without this configuration (or the LegacyConfiguration), the pattern of
* `any(MyTaintKind k).taints(control_flow_node)` used in DbConnectionExecuteArgument would not work.
*/
class DbConfiguration extends TaintTracking::Configuration {
DbConfiguration() { this = "DB configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof DjangoModelObjects or
source instanceof DbConnectionSource
}
}
from SQLInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This SQL query depends on $@.", src.getSource(),
"a user-provided value"

View File

@@ -1,30 +0,0 @@
/**
* @name Code injection
* @description OLD QUERY: Interpreting unsanitized user input as code allows a malicious user arbitrary
* code execution.
* @kind path-problem
* @problem.severity error
* @id py/old/code-injection
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.security.injection.Exec
class CodeInjectionConfiguration extends TaintTracking::Configuration {
CodeInjectionConfiguration() { this = "Code injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof StringEvaluationNode }
}
from CodeInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "$@ flows to here and is interpreted as code.", src.getSource(),
"A user-provided value"

View File

@@ -1,33 +0,0 @@
/**
* @name Clear-text logging of sensitive information
* @description OLD QUERY: Logging sensitive information without encryption or hashing can
* expose it to an attacker.
* @kind path-problem
* @problem.severity error
* @id py/old/clear-text-logging-sensitive-data
* @deprecated
*/
import python
import semmle.python.security.Paths
import semmle.python.dataflow.TaintTracking
import semmle.python.security.SensitiveData
import semmle.python.security.ClearText
class CleartextLoggingConfiguration extends TaintTracking::Configuration {
CleartextLoggingConfiguration() { this = "ClearTextLogging" }
override predicate isSource(DataFlow::Node src, TaintKind kind) {
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
}
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
sink.asCfgNode() instanceof ClearTextLogging::Sink and
kind instanceof SensitiveData
}
}
from CleartextLoggingConfiguration config, TaintedPathSource source, TaintedPathSink sink
where config.hasFlowPath(source, sink)
select sink.getSink(), source, sink, "Sensitive data returned by $@ is logged here.",
source.getSource(), source.getCfgNode().(SensitiveData::Source).repr()

View File

@@ -1,33 +0,0 @@
/**
* @name Clear-text storage of sensitive information
* @description OLD QUERY: Sensitive information stored without encryption or hashing can expose it to an
* attacker.
* @kind path-problem
* @problem.severity error
* @id py/old/clear-text-storage-sensitive-data
* @deprecated
*/
import python
import semmle.python.security.Paths
import semmle.python.dataflow.TaintTracking
import semmle.python.security.SensitiveData
import semmle.python.security.ClearText
class CleartextStorageConfiguration extends TaintTracking::Configuration {
CleartextStorageConfiguration() { this = "ClearTextStorage" }
override predicate isSource(DataFlow::Node src, TaintKind kind) {
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
}
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
sink.asCfgNode() instanceof ClearTextStorage::Sink and
kind instanceof SensitiveData
}
}
from CleartextStorageConfiguration config, TaintedPathSource source, TaintedPathSink sink
where config.hasFlowPath(source, sink)
select sink.getSink(), source, sink, "Sensitive data from $@ is stored here.", source.getSource(),
source.getCfgNode().(SensitiveData::Source).repr()

View File

@@ -1,78 +0,0 @@
/**
* @name OLD QUERY: Use of weak cryptographic key
* @description Use of a cryptographic key that is too small may allow the encryption to be broken.
* @kind problem
* @problem.severity error
* @id py/old/weak-crypto-key
*/
import python
int minimumSecureKeySize(string algo) {
algo = "DSA" and result = 2048
or
algo = "RSA" and result = 2048
or
algo = "ECC" and result = 224
}
predicate dsaRsaKeySizeArg(FunctionValue func, string algorithm, string arg) {
exists(ModuleValue mod | func = mod.attr(_) |
algorithm = "DSA" and
(
mod = Module::named("cryptography.hazmat.primitives.asymmetric.dsa") and arg = "key_size"
or
mod = Module::named("Crypto.PublicKey.DSA") and arg = "bits"
or
mod = Module::named("Cryptodome.PublicKey.DSA") and arg = "bits"
)
or
algorithm = "RSA" and
(
mod = Module::named("cryptography.hazmat.primitives.asymmetric.rsa") and arg = "key_size"
or
mod = Module::named("Crypto.PublicKey.RSA") and arg = "bits"
or
mod = Module::named("Cryptodome.PublicKey.RSA") and arg = "bits"
)
)
}
predicate ecKeySizeArg(FunctionValue func, string arg) {
exists(ModuleValue mod | func = mod.attr(_) |
mod = Module::named("cryptography.hazmat.primitives.asymmetric.ec") and arg = "curve"
)
}
int keySizeFromCurve(ClassValue curveClass) {
result = curveClass.declaredAttribute("key_size").(NumericValue).getIntValue()
}
predicate algorithmAndKeysizeForCall(
CallNode call, string algorithm, int keySize, ControlFlowNode keyOrigin
) {
exists(FunctionValue func, string argname, ControlFlowNode arg |
arg = func.getNamedArgumentForCall(call, argname)
|
exists(NumericValue key |
arg.pointsTo(key, keyOrigin) and
dsaRsaKeySizeArg(func, algorithm, argname) and
keySize = key.getIntValue()
)
or
exists(Value curveClassInstance |
algorithm = "ECC" and
ecKeySizeArg(func, argname) and
arg.pointsTo(_, curveClassInstance, keyOrigin) and
keySize = keySizeFromCurve(curveClassInstance.getClass())
)
)
}
from CallNode call, string algo, int keySize, ControlFlowNode origin
where
algorithmAndKeysizeForCall(call, algo, keySize, origin) and
keySize < minimumSecureKeySize(algo)
select call,
"Creation of an " + algo + " key uses $@ bits, which is below " + minimumSecureKeySize(algo) +
" and considered breakable.", origin, keySize.toString()

View File

@@ -1,28 +0,0 @@
/**
* @name OLD QUERY: Use of a broken or weak cryptographic algorithm
* @description Using broken or weak cryptographic algorithms can compromise security.
* @kind path-problem
* @problem.severity warning
* @id py/old/weak-cryptographic-algorithm
* @deprecated
*/
import python
import semmle.python.security.Paths
import semmle.python.security.SensitiveData
import semmle.python.security.Crypto
class BrokenCryptoConfiguration extends TaintTracking::Configuration {
BrokenCryptoConfiguration() { this = "Broken crypto configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof SensitiveDataSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof WeakCryptoSink }
}
from BrokenCryptoConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "$@ is used in a broken or weak cryptographic algorithm.",
src.getSource(), "Sensitive data"

View File

@@ -1,32 +0,0 @@
/**
* @name OLD QUERY: Deserializing untrusted input
* @description Deserializing user-controlled data may allow attackers to execute arbitrary code.
* @kind path-problem
* @id py/old/unsafe-deserialization
* @problem.severity error
*/
import python
import semmle.python.security.Paths
// Sources -- Any untrusted input
import semmle.python.web.HttpRequest
// Flow -- untrusted string
import semmle.python.security.strings.Untrusted
// Sink -- Unpickling and other deserialization formats.
import semmle.python.security.injection.Pickle
import semmle.python.security.injection.Marshal
import semmle.python.security.injection.Yaml
class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
UnsafeDeserializationConfiguration() { this = "Unsafe deserialization configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof DeserializationSink }
}
from UnsafeDeserializationConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "Deserializing of $@.", src.getSource(), "untrusted input"

View File

@@ -1,37 +0,0 @@
/**
* @name OLD QUERY: URL redirection from remote source
* @description URL redirection based on unvalidated user input
* may cause redirection to malicious web sites.
* @kind path-problem
* @problem.severity error
* @id py/old/url-redirection
*/
import python
import semmle.python.security.Paths
import semmle.python.web.HttpRedirect
import semmle.python.web.HttpRequest
import semmle.python.security.strings.Untrusted
/** Url redirection is a problem only if the user controls the prefix of the URL */
class UntrustedPrefixStringKind extends UntrustedStringKind {
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
result = UntrustedStringKind.super.getTaintForFlowStep(fromnode, tonode) and
not tonode.(BinaryExprNode).getRight() = fromnode
}
}
class UrlRedirectConfiguration extends TaintTracking::Configuration {
UrlRedirectConfiguration() { this = "URL redirect configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof HttpRedirectTaintSink }
}
from UrlRedirectConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "Untrusted URL redirection due to $@.", src.getSource(),
"a user-provided value"

View File

@@ -5,8 +5,7 @@
* @kind path-problem
* @problem.severity error
* @id py/ldap-injection
* @tags experimental
* security
* @tags security
* external/cwe/cwe-090
*/

View File

@@ -4,8 +4,7 @@
* @kind problem
* @problem.severity warning
* @id py/improper-ldap-auth
* @tags experimental
* security
* @tags security
* external/cwe/cwe-287
*/

View File

@@ -0,0 +1,7 @@
import jwt
# algorithm set to None
jwt.encode(payload, "somekey", None)
# empty key
jwt.encode(payload, key="", algorithm="HS256")

View File

@@ -0,0 +1,30 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Applications encoding a JSON Web Token (JWT) may be vulnerable when the applied key or algorithm
is empty or <code>None</code>.</p>
</overview>
<recommendation>
<p>Use non-empty nor <code>None</code> values while encoding JWT payloads.</p>
</recommendation>
<example>
<p>This example shows two PyJWT encoding calls.
In the first place, the encoding process use a None algorithm whereas the second example uses an
empty key. Both examples leave the payload insecurely encoded.
</p>
<sample src="JWTEmptyKeyOrAlgorithm.py" />
</example>
<references>
<li>PyJWT: <a href="https://pyjwt.readthedocs.io/en/stable/">Documentation</a>.</li>
<li>Authlib JWT: <a href="https://docs.authlib.org/en/latest/specs/rfc7519.html">Documentation</a>.</li>
<li>Python-Jose: <a href="https://github.com/mpdavis/python-jose">Documentation</a>.</li>
<li>Auth0 Blog: <a href="https://auth0.com/blog/critical-vulnerabilities-in-json-web-token-libraries/#Meet-the--None--Algorithm">Meet the "None" Algorithm</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,22 @@
/**
* @name JWT encoding using empty key or algorithm
* @description The application uses an empty secret or algorithm while encoding a JWT Token.
* @kind problem
* @problem.severity warning
* @id py/jwt-empty-secret-or-algorithm
* @tags security
*/
// determine precision above
import python
import experimental.semmle.python.Concepts
import experimental.semmle.python.frameworks.JWT
from JWTEncoding jwtEncoding, string affectedComponent
where
affectedComponent = "algorithm" and
isEmptyOrNone(jwtEncoding.getAlgorithm())
or
affectedComponent = "key" and
isEmptyOrNone(jwtEncoding.getKey())
select jwtEncoding, "This JWT encoding has an empty " + affectedComponent + "."

View File

@@ -0,0 +1,4 @@
import jwt
# unverified decoding
jwt.decode(payload, key="somekey", verify=False)

View File

@@ -0,0 +1,30 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Applications decoding a JSON Web Token (JWT) may be vulnerable when the
key isn't verified in the process.
</p>
</overview>
<recommendation>
<p>Set the <code>verify</code> argument to <code>True</code> or use
a framework that does it by default.
</p>
</recommendation>
<example>
<p>This example shows a PyJWT encoding call with the <code>verify</code>
argument set to <code>False</code>.
</p>
<sample src="JWTMissingSecretOrPublicKeyVerification.py" />
</example>
<references>
<li>PyJWT: <a href="https://pyjwt.readthedocs.io/en/stable/">Documentation</a>.</li>
<li>Authlib JWT: <a href="https://docs.authlib.org/en/latest/specs/rfc7519.html">Documentation</a>.</li>
<li>Python-Jose: <a href="https://github.com/mpdavis/python-jose">Documentation</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,17 @@
/**
* @name JWT missing secret or public key verification
* @description The application does not verify the JWT payload with a cryptographic secret or public key.
* @kind problem
* @problem.severity warning
* @id py/jwt-missing-verification
* @tags security
* external/cwe/cwe-347
*/
// determine precision above
import python
import experimental.semmle.python.Concepts
from JWTDecoding jwtDecoding
where not jwtDecoding.verifiesSignature()
select jwtDecoding.getPayload(), "is not verified with a cryptographic secret or public key."

View File

@@ -4,8 +4,7 @@
* @kind path-problem
* @problem.severity error
* @id py/insecure-ldap-auth
* @tags experimental
* security
* @tags security
* external/cwe/cwe-522
* external/cwe/cwe-523
*/

View File

@@ -5,8 +5,7 @@
* @kind path-problem
* @problem.severity error
* @id py/nosql-injection
* @tags experimental
* security
* @tags security
* external/cwe/cwe-943
*/

View File

@@ -386,3 +386,141 @@ class HeaderDeclaration extends DataFlow::Node {
*/
DataFlow::Node getValueArg() { result = range.getValueArg() }
}
/** Provides classes for modeling JWT encoding-related APIs. */
module JWTEncoding {
/**
* A data-flow node that collects methods encoding a JWT token.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `JWTEncoding` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the encoding payload.
*/
abstract DataFlow::Node getPayload();
/**
* Gets the argument containing the encoding key.
*/
abstract DataFlow::Node getKey();
/**
* Gets the argument for the algorithm used in the encoding.
*/
abstract DataFlow::Node getAlgorithm();
/**
* Gets a string representation of the algorithm used in the encoding.
*/
abstract string getAlgorithmString();
}
}
/**
* A data-flow node that collects methods encoding a JWT token.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `JWTEncoding::Range` instead.
*/
class JWTEncoding extends DataFlow::Node instanceof JWTEncoding::Range {
/**
* Gets the argument containing the payload.
*/
DataFlow::Node getPayload() { result = super.getPayload() }
/**
* Gets the argument containing the encoding key.
*/
DataFlow::Node getKey() { result = super.getKey() }
/**
* Gets the argument for the algorithm used in the encoding.
*/
DataFlow::Node getAlgorithm() { result = super.getAlgorithm() }
/**
* Gets a string representation of the algorithm used in the encoding.
*/
string getAlgorithmString() { result = super.getAlgorithmString() }
}
/** Provides classes for modeling JWT decoding-related APIs. */
module JWTDecoding {
/**
* A data-flow node that collects methods decoding a JWT token.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `JWTDecoding` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the encoding payload.
*/
abstract DataFlow::Node getPayload();
/**
* Gets the argument containing the encoding key.
*/
abstract DataFlow::Node getKey();
/**
* Gets the argument for the algorithm used in the encoding.
*/
abstract DataFlow::Node getAlgorithm();
/**
* Gets a string representation of the algorithm used in the encoding.
*/
abstract string getAlgorithmString();
/**
* Gets the options Node used in the encoding.
*/
abstract DataFlow::Node getOptions();
/**
* Checks if the signature gets verified while decoding.
*/
abstract predicate verifiesSignature();
}
}
/**
* A data-flow node that collects methods encoding a JWT token.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `JWTDecoding::Range` instead.
*/
class JWTDecoding extends DataFlow::Node instanceof JWTDecoding::Range {
/**
* Gets the argument containing the payload.
*/
DataFlow::Node getPayload() { result = super.getPayload() }
/**
* Gets the argument containing the encoding key.
*/
DataFlow::Node getKey() { result = super.getKey() }
/**
* Gets the argument for the algorithm used in the encoding.
*/
DataFlow::Node getAlgorithm() { result = super.getAlgorithm() }
/**
* Gets a string representation of the algorithm used in the encoding.
*/
string getAlgorithmString() { result = super.getAlgorithmString() }
/**
* Gets the options Node used in the encoding.
*/
DataFlow::Node getOptions() { result = super.getOptions() }
/**
* Checks if the signature gets verified while decoding.
*/
predicate verifiesSignature() { super.verifiesSignature() }
}

View File

@@ -10,3 +10,7 @@ private import experimental.semmle.python.frameworks.Werkzeug
private import experimental.semmle.python.frameworks.LDAP
private import experimental.semmle.python.frameworks.NoSQL
private import experimental.semmle.python.frameworks.Log
private import experimental.semmle.python.frameworks.JWT
private import experimental.semmle.python.libraries.PyJWT
private import experimental.semmle.python.libraries.Authlib
private import experimental.semmle.python.libraries.PythonJose

View File

@@ -10,7 +10,7 @@ private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
import semmle.python.dataflow.new.RemoteFlowSources
private module PrivateDjango {
private module ExperimentalPrivateDjango {
private module django {
API::Node http() { result = API::moduleImport("django").getMember("http") }

View File

@@ -0,0 +1,23 @@
private import python
private import semmle.python.ApiGraphs
/** Checks if the argument is empty or none. */
predicate isEmptyOrNone(DataFlow::Node arg) { isEmpty(arg) or isNone(arg) }
/** Checks if an empty string `""` flows to `arg` */
predicate isEmpty(DataFlow::Node arg) {
exists(StrConst emptyString |
emptyString.getText() = "" and
DataFlow::exprNode(emptyString).(DataFlow::LocalSourceNode).flowsTo(arg)
)
}
/** Checks if `None` flows to `arg` */
predicate isNone(DataFlow::Node arg) {
DataFlow::exprNode(any(None no)).(DataFlow::LocalSourceNode).flowsTo(arg)
}
/** Checks if `False` flows to `arg` */
predicate isFalse(DataFlow::Node arg) {
DataFlow::exprNode(any(False falseExpr)).(DataFlow::LocalSourceNode).flowsTo(arg)
}

View File

@@ -0,0 +1,87 @@
private import python
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private import experimental.semmle.python.frameworks.JWT
private module Authlib {
/** Gets a reference to `authlib.jose.(jwt|JsonWebToken)` */
private API::Node authlibJWT() {
result in [
API::moduleImport("authlib").getMember("jose").getMember("jwt"),
API::moduleImport("authlib").getMember("jose").getMember("JsonWebToken").getReturn()
]
}
/** Gets a reference to `jwt.encode` */
private API::Node authlibJWTEncode() { result = authlibJWT().getMember("encode") }
/** Gets a reference to `jwt.decode` */
private API::Node authlibJWTDecode() { result = authlibJWT().getMember("decode") }
/**
* Gets a call to `authlib.jose.(jwt|JsonWebToken).encode`.
*
* Given the following example:
*
* ```py
* jwt.encode({"alg": "HS256"}, token, "key")
* ```
*
* * `this` would be `jwt.encode({"alg": "HS256"}, token, "key")`.
* * `getPayload()`'s result would be `token`.
* * `getKey()`'s result would be `"key"`.
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
*/
private class AuthlibJWTEncodeCall extends DataFlow::CallCfgNode, JWTEncoding::Range {
AuthlibJWTEncodeCall() { this = authlibJWTEncode().getACall() }
override DataFlow::Node getPayload() { result = this.getArg(1) }
override DataFlow::Node getKey() { result = this.getArg(2) }
override DataFlow::Node getAlgorithm() {
exists(KeyValuePair headerDict |
headerDict = this.getArg(0).asExpr().(Dict).getItem(_) and
headerDict.getKey().(Str_).getS().matches("alg") and
result.asExpr() = headerDict.getValue()
)
}
override string getAlgorithmString() {
exists(StrConst str |
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getAlgorithm()) and
result = str.getText()
)
}
}
/**
* Gets a call to `authlib.jose.(jwt|JsonWebToken).decode`
*
* Given the following example:
*
* ```py
* jwt.decode(token, key)
* ```
*
* * `this` would be `jwt.decode(token, key)`.
* * `getPayload()`'s result would be `token`.
* * `getKey()`'s result would be `key`.
*/
private class AuthlibJWTDecodeCall extends DataFlow::CallCfgNode, JWTDecoding::Range {
AuthlibJWTDecodeCall() { this = authlibJWTDecode().getACall() }
override DataFlow::Node getPayload() { result = this.getArg(0) }
override DataFlow::Node getKey() { result = this.getArg(1) }
override DataFlow::Node getAlgorithm() { none() }
override string getAlgorithmString() { none() }
override DataFlow::Node getOptions() { none() }
override predicate verifiesSignature() { any() }
}
}

View File

@@ -0,0 +1,108 @@
private import python
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private import experimental.semmle.python.frameworks.JWT
private module PyJWT {
/** Gets a reference to `jwt.encode` */
private API::Node pyjwtEncode() { result = API::moduleImport("jwt").getMember("encode") }
/** Gets a reference to `jwt.decode` */
private API::Node pyjwtDecode() { result = API::moduleImport("jwt").getMember("decode") }
/**
* Gets a call to `jwt.encode`.
*
* Given the following example:
*
* ```py
* jwt.encode(token, "key", "HS256")
* ```
*
* * `this` would be `jwt.encode(token, "key", "HS256")`.
* * `getPayload()`'s result would be `token`.
* * `getKey()`'s result would be `"key"`.
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
*/
private class PyJWTEncodeCall extends DataFlow::CallCfgNode, JWTEncoding::Range {
PyJWTEncodeCall() { this = pyjwtEncode().getACall() }
override DataFlow::Node getPayload() {
result in [this.getArg(0), this.getArgByName("payload")]
}
override DataFlow::Node getKey() { result in [this.getArg(1), this.getArgByName("key")] }
override DataFlow::Node getAlgorithm() {
result in [this.getArg(2), this.getArgByName("algorithm")]
}
override string getAlgorithmString() {
exists(StrConst str |
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getAlgorithm()) and
result = str.getText()
)
}
}
/**
* Gets a call to `jwt.decode`.
*
* Given the following example:
*
* ```py
* jwt.decode(token, key, "HS256", options={"verify_signature": True})
* ```
*
* * `this` would be `jwt.decode(token, key, options={"verify_signature": True})`.
* * `getPayload()`'s result would be `token`.
* * `getKey()`'s result would be `key`.
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
* * `getOptions()`'s result would be `{"verify_signature": True}`.
* * `verifiesSignature()` predicate would succeed.
*/
private class PyJWTDecodeCall extends DataFlow::CallCfgNode, JWTDecoding::Range {
PyJWTDecodeCall() { this = pyjwtDecode().getACall() }
override DataFlow::Node getPayload() { result in [this.getArg(0), this.getArgByName("jwt")] }
override DataFlow::Node getKey() { result in [this.getArg(1), this.getArgByName("key")] }
override DataFlow::Node getAlgorithm() {
result in [this.getArg(2), this.getArgByName("algorithms")]
}
override string getAlgorithmString() {
exists(StrConst str |
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getAlgorithm()) and
result = str.getText()
)
}
override DataFlow::Node getOptions() {
result in [this.getArg(3), this.getArgByName("options")]
}
override predicate verifiesSignature() {
not this.hasVerifySetToFalse() and
not this.hasVerifySignatureSetToFalse()
}
predicate hasNoVerifyArgumentOrOptions() {
not exists(this.getArgByName("verify")) and not exists(this.getOptions())
}
predicate hasVerifySetToFalse() { isFalse(this.getArgByName("verify")) }
predicate hasVerifySignatureSetToFalse() {
exists(KeyValuePair optionsDict, NameConstant falseName |
falseName.getId() = "False" and
optionsDict = this.getOptions().asExpr().(Dict).getItem(_) and
optionsDict.getKey().(Str_).getS().matches("%verify%") and
falseName = optionsDict.getValue()
)
}
}
}

View File

@@ -0,0 +1,105 @@
private import python
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private import experimental.semmle.python.frameworks.JWT
private module PythonJose {
/** Gets a reference to `jwt` */
private API::Node joseJWT() { result = API::moduleImport("jose").getMember("jwt") }
/** Gets a reference to `jwt.encode` */
private API::Node joseJWTEncode() { result = joseJWT().getMember("encode") }
/** Gets a reference to `jwt.decode` */
private API::Node joseJWTDecode() { result = joseJWT().getMember("decode") }
/**
* Gets a call to `jwt.encode`.
*
* Given the following example:
*
* ```py
* jwt.encode(token, key="key", algorithm="HS256")
* ```
*
* * `this` would be `jwt.encode(token, key="key", algorithm="HS256")`.
* * `getPayload()`'s result would be `token`.
* * `getKey()`'s result would be `"key"`.
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
*/
private class JoseJWTEncodeCall extends DataFlow::CallCfgNode, JWTEncoding::Range {
JoseJWTEncodeCall() { this = joseJWTEncode().getACall() }
override DataFlow::Node getPayload() { result = this.getArg(0) }
override DataFlow::Node getKey() { result in [this.getArg(1), this.getArgByName("key")] }
override DataFlow::Node getAlgorithm() {
result in [this.getArg(2), this.getArgByName("algorithm")]
}
override string getAlgorithmString() {
exists(StrConst str |
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getAlgorithm()) and
result = str.getText()
)
}
}
/**
* Gets a call to `jwt.decode`.
*
* Given the following example:
*
* ```py
* jwt.decode(token, "key", "HS256")
* ```
*
* * `this` would be `jwt.decode(token, "key", "HS256")`.
* * `getPayload()`'s result would be `token`.
* * `getKey()`'s result would be `"key"`.
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
* * `getOptions()`'s result would be none.
* * `verifiesSignature()` predicate would succeed.
*/
private class JoseJWTDecodeCall extends DataFlow::CallCfgNode, JWTDecoding::Range {
JoseJWTDecodeCall() { this = joseJWTDecode().getACall() }
override DataFlow::Node getPayload() { result = this.getArg(0) }
override DataFlow::Node getKey() { result in [this.getArg(1), this.getArgByName("key")] }
override DataFlow::Node getAlgorithm() {
result in [this.getArg(2), this.getArgByName("algorithms")]
}
override string getAlgorithmString() {
exists(StrConst str |
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getAlgorithm()) and
result = str.getText()
)
}
override DataFlow::Node getOptions() {
result in [this.getArg(3), this.getArgByName("options")]
}
override predicate verifiesSignature() {
// jwt.decode(token, key, options={"verify_signature": False})
not this.hasVerifySignatureSetToFalse()
}
predicate hasNoOptions() { not exists(this.getOptions()) }
predicate hasVerifySignatureSetToFalse() {
exists(KeyValuePair optionsDict, NameConstant falseName |
falseName.getId() = "False" and
optionsDict = this.getOptions().asExpr().(Dict).getItem(_) and
optionsDict.getKey().(Str_).getS().matches("%verify%") and
falseName = optionsDict.getValue()
)
}
}
}

View File

@@ -19,7 +19,9 @@ class ExternalDefect extends @externalDefect {
Location getLocation() { externalDefects(this, _, result, _, _) }
/** Gets a textual representation of this element. */
string toString() { result = getQueryPath() + ": " + getLocation() + " - " + getMessage() }
string toString() {
result = this.getQueryPath() + ": " + this.getLocation() + " - " + this.getMessage()
}
}
class ExternalMetric extends @externalMetric {
@@ -30,7 +32,9 @@ class ExternalMetric extends @externalMetric {
Location getLocation() { externalMetrics(this, _, result, _) }
/** Gets a textual representation of this element. */
string toString() { result = getQueryPath() + ": " + getLocation() + " - " + getValue() }
string toString() {
result = this.getQueryPath() + ": " + this.getLocation() + " - " + this.getValue()
}
}
/**
@@ -44,7 +48,7 @@ class ExternalData extends @externalDataElement {
* Gets the path of the file this data was loaded from, with its
* extension replaced by `.ql`.
*/
string getQueryPath() { result = getDataPath().regexpReplaceAll("\\.[^.]*$", ".ql") }
string getQueryPath() { result = this.getDataPath().regexpReplaceAll("\\.[^.]*$", ".ql") }
/** Gets the number of fields in this data item. */
int getNumFields() { result = 1 + max(int i | externalData(this, _, i, _) | i) }
@@ -53,22 +57,23 @@ class ExternalData extends @externalDataElement {
string getField(int index) { externalData(this, _, index, result) }
/** Gets the integer value of the field at position `index` of this data item. */
int getFieldAsInt(int index) { result = getField(index).toInt() }
int getFieldAsInt(int index) { result = this.getField(index).toInt() }
/** Gets the floating-point value of the field at position `index` of this data item. */
float getFieldAsFloat(int index) { result = getField(index).toFloat() }
float getFieldAsFloat(int index) { result = this.getField(index).toFloat() }
/** Gets the value of the field at position `index` of this data item, interpreted as a date. */
date getFieldAsDate(int index) { result = getField(index).toDate() }
date getFieldAsDate(int index) { result = this.getField(index).toDate() }
/** Gets a textual representation of this data item. */
string toString() { result = getQueryPath() + ": " + buildTupleString(0) }
string toString() { result = this.getQueryPath() + ": " + this.buildTupleString(0) }
/** Gets a textual representation of this data item, starting with the field at position `start`. */
private string buildTupleString(int start) {
start = getNumFields() - 1 and result = getField(start)
start = this.getNumFields() - 1 and result = this.getField(start)
or
start < getNumFields() - 1 and result = getField(start) + "," + buildTupleString(start + 1)
start < this.getNumFields() - 1 and
result = this.getField(start) + "," + this.buildTupleString(start + 1)
}
}
@@ -81,7 +86,7 @@ class DefectExternalData extends ExternalData {
this.getNumFields() = 2
}
string getURL() { result = getField(0) }
string getURL() { result = this.getField(0) }
string getMessage() { result = getField(1) }
string getMessage() { result = this.getField(1) }
}

View File

@@ -13,9 +13,9 @@ class ThriftElement extends ExternalData {
string getKind() { result = kind }
string getId() { result = getField(0) }
string getId() { result = this.getField(0) }
int getIndex() { result = getFieldAsInt(1) }
int getIndex() { result = this.getFieldAsInt(1) }
ThriftElement getParent() { result.getId() = this.getField(2) }

View File

@@ -29,7 +29,7 @@ class Commit extends @svnentry {
)
}
string getAnAffectedFilePath() { result = getAnAffectedFilePath(_) }
string getAnAffectedFilePath() { result = this.getAnAffectedFilePath(_) }
File getAnAffectedFile(string action) { svnaffectedfiles(this, result, action) }
@@ -38,7 +38,7 @@ class Commit extends @svnentry {
predicate isRecent() { recentCommit(this) }
int daysToNow() {
exists(date now | snapshotDate(now) | result = getDate().daysTo(now) and result >= 0)
exists(date now | snapshotDate(now) | result = this.getDate().daysTo(now) and result >= 0)
}
int getRecentAdditionsForFile(File f) { svnchurn(this, f, result, _) }
@@ -46,7 +46,7 @@ class Commit extends @svnentry {
int getRecentDeletionsForFile(File f) { svnchurn(this, f, _, result) }
int getRecentChurnForFile(File f) {
result = getRecentAdditionsForFile(f) + getRecentDeletionsForFile(f)
result = this.getRecentAdditionsForFile(f) + this.getRecentDeletionsForFile(f)
}
}

View File

@@ -0,0 +1,23 @@
/**
* @name Request Handlers
* @description HTTP Server Request Handlers
* @kind problem
* @problem.severity recommendation
* @id py/meta/alerts/request-handlers
* @tags meta
* @precision very-low
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import meta.MetaMetrics
from HTTP::Server::RequestHandler requestHandler, string title
where
not requestHandler.getLocation().getFile() instanceof IgnoredFile and
if requestHandler.isMethod()
then
title = "Method " + requestHandler.getScope().(Class).getName() + "." + requestHandler.getName()
else title = requestHandler.toString()
select requestHandler, "RequestHandler: " + title

View File

@@ -1,8 +1,11 @@
name: codeql/python-queries
version: 0.0.2
version: 0.0.8-dev
groups:
- python
- queries
dependencies:
codeql/python-all: "*"
codeql/suite-helpers: "*"
codeql/python-all: "*"
codeql/suite-helpers: "*"
suites: codeql-suites
extractor: python
defaultSuiteFile: codeql-suites/python-code-scanning.qls