Merge remote-tracking branch 'origin/main' into jty/python/emailInjection

This commit is contained in:
jorgectf
2022-02-26 01:22:55 +01:00
4485 changed files with 536628 additions and 132797 deletions

View File

@@ -0,0 +1,39 @@
## 0.0.9
### Bug Fixes
* The [View AST functionality](https://codeql.github.com/docs/codeql-for-visual-studio-code/exploring-the-structure-of-your-source-code/) no longer prints detailed information about regular expressions, greatly improving performance.
## 0.0.8
### Major Analysis Improvements
* User names and other account information is no longer considered to be sensitive data for the queries `py/clear-text-logging-sensitive-data` and `py/clear-text-storage-sensitive-data`, since this lead to many false positives.
## 0.0.7
## 0.0.6
### New Queries
* Two new queries have been added for detecting Server-side request forgery (SSRF). _Full server-side request forgery_ (`py/full-ssrf`) will only alert when the URL is fully user-controlled, and _Partial server-side request forgery_ (`py/partial-ssrf`) will alert when any part of the URL is user-controlled. Only `py/full-ssrf` will be run by default.
### Minor Analysis Improvements
* To support the new SSRF queries, the PyPI package `requests` has been modeled, along with `http.client.HTTP[S]Connection` from the standard library.
## 0.0.5
### Minor Analysis Improvements
* Added modeling of many functions from the `os` module that uses file system paths, such as `os.stat`, `os.chdir`, `os.mkdir`, and so on. All of these are new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of the `tempfile` module for creating temporary files and directories, such as the functions `tempfile.NamedTemporaryFile` and `tempfile.TemporaryDirectory`. The `suffix`, `prefix`, and `dir` arguments are all vulnerable to path-injection, and these are new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Extended the modeling of FastAPI such that `fastapi.responses.FileResponse` are considered `FileSystemAccess`, making them sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of the `posixpath`, `ntpath`, and `genericpath` modules for path operations (although these are not supposed to be used), resulting in new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of `wsgiref.simple_server` applications, leading to new remote flow sources.
## 0.0.4
### Query Metadata Changes
* Fixed the query ids of two queries that are meant for manual exploration: `python/count-untrusted-data-external-api` and `python/untrusted-data-to-external-api` have been changed to `py/count-untrusted-data-external-api` and `py/untrusted-data-to-external-api`.

View File

@@ -85,7 +85,7 @@ class CheckClass extends ClassObject {
predicate interestingUndefined(SelfAttributeRead a) {
exists(string name | name = a.getName() |
interestingContext(a, name) and
this.interestingContext(a, name) and
not this.definedInBlock(a.getAFlowNode().getBasicBlock(), name)
)
}
@@ -98,7 +98,7 @@ class CheckClass extends ClassObject {
not a.guardedByHasattr() and
a.getScope().isPublic() and
not this.monkeyPatched(name) and
not attribute_assigned_in_method(lookupAttribute("setUp"), name)
not attribute_assigned_in_method(this.lookupAttribute("setUp"), name)
}
private predicate probablyAbstract() {
@@ -127,7 +127,7 @@ class CheckClass extends ClassObject {
// so we can push the context in from there, which must apply to a
// SelfAttributeRead in the same scope
exists(SelfAttributeRead a | a.getScope() = b.getScope() and name = a.getName() |
interestingContext(a, name)
this.interestingContext(a, name)
) and
this.definitionInBlock(b, name)
or

View File

@@ -40,9 +40,7 @@ library class PossibleAdvancedFormatString extends StrConst {
private predicate implicitlyNumberedField(int start, int end) {
this.field(start, end) and
exists(string c | start + 1 = this.getText().indexOf(c) |
c = "}" or c = ":" or c = "!" or c = "."
)
exists(string c | start + 1 = this.getText().indexOf(c) | c in ["}", ":", "!", "."])
}
/** Whether this format string has implicitly numbered fields */

View File

@@ -32,7 +32,7 @@ predicate duplicate_char_in_class(Regex r, string char) {
//Ignore whitespace in verbose mode
not (
r.getAMode() = "VERBOSE" and
(char = " " or char = "\t" or char = "\r" or char = "\n")
char in [" ", "\t", "\r", "\n"]
)
}

View File

@@ -3,7 +3,7 @@
* @description This reports the external APIs that are used with untrusted data, along with how
* frequently the API is called, and how many unique sources of untrusted data flow
* to it.
* @id python/count-untrusted-data-external-api
* @id py/count-untrusted-data-external-api
* @kind table
* @tags security external/cwe/cwe-20
*/

View File

@@ -1,7 +1,7 @@
/**
* @name Untrusted data passed to external API
* @description Data provided remotely is used in this external API without sanitization, which could be a security risk.
* @id python/untrusted-data-to-external-api
* @id py/untrusted-data-to-external-api
* @kind path-problem
* @precision low
* @problem.severity error

View File

@@ -9,7 +9,6 @@
* @id py/path-injection
* @tags correctness
* security
* external/owasp/owasp-a1
* external/cwe/cwe-022
* external/cwe/cwe-023
* external/cwe/cwe-036
@@ -19,7 +18,9 @@
import python
import semmle.python.security.dataflow.PathInjection
import DataFlow::PathGraph
from CustomPathNode source, CustomPathNode sink
where pathInjection(source, sink)
select sink, source, sink, "This path depends on $@.", source, "a user-provided value"
from PathInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This path depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -10,7 +10,6 @@
* @id py/command-line-injection
* @tags correctness
* security
* external/owasp/owasp-a1
* external/cwe/cwe-078
* external/cwe/cwe-088
*/

View File

@@ -9,7 +9,6 @@
* @id py/sql-injection
* @tags security
* external/cwe/cwe-089
* external/owasp/owasp-a1
*/
import python

View File

@@ -4,22 +4,22 @@
<qhelp>
<overview>
<p>If an LDAP query or DN is built using string concatenation or string formatting, and the
components of the concatenation include user input without any proper sanitization, a user
components of the concatenation include user input without any proper sanitization, a user
is likely to be able to run malicious LDAP queries.</p>
</overview>
<recommendation>
<p>If user input must be included in an LDAP query or DN, it should be escaped to
avoid a malicious user providing special characters that change the meaning
of the query. In Python2, user input should be escaped with <code>ldap.dn.escape_dn_chars</code>
or <code>ldap.filter.escape_filter_chars</code>, while in Python3, user input should be escaped with
of the query. In Python2, user input should be escaped with <code>ldap.dn.escape_dn_chars</code>
or <code>ldap.filter.escape_filter_chars</code>, while in Python3, user input should be escaped with
<code>ldap3.utils.dn.escape_rdn</code> or <code>ldap3.utils.conv.escape_filter_chars</code>
depending on the component tainted by the user. A good practice is to escape filter characters
depending on the component tainted by the user. A good practice is to escape filter characters
that could change the meaning of the query (https://tools.ietf.org/search/rfc4515#section-3).</p>
</recommendation>
<example>
<p>In the following examples, the code accepts both <code>username</code> and <code>dc</code> from the user,
<p>In the following examples, the code accepts both <code>username</code> and <code>dc</code> from the user,
which it then uses to build a LDAP query and DN.</p>
<p>The first and the second example uses the unsanitized user input directly
@@ -30,7 +30,7 @@ components, and search for a completely different set of values.</p>
<sample src="examples/example_bad1.py" />
<sample src="examples/example_bad2.py" />
<p>In the third and four example, the input provided by the user is sanitized before it is included in the search filter or DN.
<p>In the third and fourth example, the input provided by the user is sanitized before it is included in the search filter or DN.
This ensures the meaning of the query cannot be changed by a malicious user.</p>
<sample src="examples/example_good1.py" />

View File

@@ -0,0 +1,28 @@
/**
* @name LDAP query built from user-controlled sources
* @description Building an LDAP query from user-controlled sources is vulnerable to insertion of
* malicious LDAP code by the user.
* @kind path-problem
* @problem.severity error
* @security-severity 9.8
* @precision high
* @id py/ldap-injection
* @tags security
* external/cwe/cwe-090
*/
// Determine precision above
import python
import semmle.python.security.dataflow.LdapInjection
import DataFlow::PathGraph
from DataFlow::PathNode source, DataFlow::PathNode sink, string parameterName
where
any(LdapInjection::DnConfiguration dnConfig).hasFlowPath(source, sink) and
parameterName = "DN"
or
any(LdapInjection::FilterConfiguration filterConfig).hasFlowPath(source, sink) and
parameterName = "filter"
select sink.getNode(), source, sink,
"$@ LDAP query parameter (" + parameterName + ") comes from $@.", sink.getNode(), "This",
source.getNode(), "a user-provided value"

View File

@@ -9,7 +9,6 @@
* @precision high
* @id py/code-injection
* @tags security
* external/owasp/owasp-a1
* external/cwe/cwe-094
* external/cwe/cwe-095
* external/cwe/cwe-116

View File

@@ -0,0 +1,54 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
It is possible to match some single HTML tags using regular expressions (parsing general HTML using
regular expressions is impossible). However, if the regular expression is not written well it might
be possible to circumvent it, which can lead to cross-site scripting or other security issues.
</p>
<p>
Some of these mistakes are caused by browsers having very forgiving HTML parsers, and
will often render invalid HTML containing syntax errors.
Regular expressions that attempt to match HTML should also recognize tags containing such syntax errors.
</p>
</overview>
<recommendation>
<p>
Use a well-tested sanitization or parser library if at all possible. These libraries are much more
likely to handle corner cases correctly than a custom implementation.
</p>
</recommendation>
<example>
<p>
The following example attempts to filters out all <code>&lt;script&gt;</code> tags.
</p>
<sample src="examples/BadTagFilter.py" />
<p>
The above sanitizer does not filter out all <code>&lt;script&gt;</code> tags.
Browsers will not only accept <code>&lt;/script&gt;</code> as script end tags, but also tags such as <code>&lt;/script foo="bar"&gt;</code> even though it is a parser error.
This means that an attack string such as <code>&lt;script&gt;alert(1)&lt;/script foo="bar"&gt;</code> will not be filtered by
the function, and <code>alert(1)</code> will be executed by a browser if the string is rendered as HTML.
</p>
<p>
Other corner cases include that HTML comments can end with <code>--!&gt;</code>,
and that HTML tag names can contain upper case characters.
</p>
</example>
<references>
<li>Securitum: <a href="https://research.securitum.com/the-curious-case-of-copy-paste/">The Curious Case of Copy &amp; Paste</a>.</li>
<li>stackoverflow.com: <a href="https://stackoverflow.com/questions/1732348/regex-match-open-tags-except-xhtml-self-contained-tags#answer-1732454">You can't parse [X]HTML with regex</a>.</li>
<li>HTML Standard: <a href="https://html.spec.whatwg.org/multipage/parsing.html#comment-end-bang-state">Comment end bang state</a>.</li>
<li>stackoverflow.com: <a href="https://stackoverflow.com/questions/25559999/why-arent-browsers-strict-about-html">Why aren't browsers strict about HTML?</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,21 @@
/**
* @name Bad HTML filtering regexp
* @description Matching HTML tags using regular expressions is hard to do right, and can easily lead to security issues.
* @kind problem
* @problem.severity warning
* @security-severity 7.8
* @precision high
* @id py/bad-tag-filter
* @tags correctness
* security
* external/cwe/cwe-116
* external/cwe/cwe-020
* external/cwe/cwe-185
* external/cwe/cwe-186
*/
import semmle.python.security.BadTagFilterQuery
from HTMLMatchingRegExp regexp, string msg
where msg = min(string m | isBadRegexpFilter(regexp, m) | m order by m.length(), m) // there might be multiple, we arbitrarily pick the shortest one
select regexp, msg

View File

@@ -0,0 +1,8 @@
import re
def filterScriptTags(content):
oldContent = ""
while oldContent != content:
oldContent = content
content = re.sub(r'<script.*?>.*?</script>', '', content, flags= re.DOTALL | re.IGNORECASE)
return content

View File

@@ -8,8 +8,11 @@
<p>If unsanitized user input is written to a log entry, a malicious user may be able to forge new log entries.</p>
<p>Forgery can occur if a user provides some input creating the appearance of multiple
log entries. This can include unescaped new-line characters, or HTML or other markup.</p>
<p>Forgery can occur if a user provides some input with characters that are interpreted
when the log output is displayed. If the log is displayed as a plain text file, then new
line characters can be used by a malicious user to create the appearance of multiple log
entries. If the log is displayed as HTML, then arbitrary HTML may be included to spoof
log entries.</p>
</overview>
<recommendation>
@@ -29,14 +32,14 @@ other forms of HTML injection.
<example>
<p>
In the example, the name provided by the user is recorded using the log output function (<code>logging.info</code> or <code>app.logger.info</code>, etc.).
In these four cases, the name provided by the user is not provided The processing is recorded. If a malicious user provides <code>Guest%0D%0AUser name: Admin</code>
In the example, the name provided by the user is recorded using the log output function (<code>logging.info</code> or <code>app.logger.info</code>, etc.).
In these four cases, the name provided by the user is not provided The processing is recorded. If a malicious user provides <code>Guest%0D%0AUser name: Admin</code>
as a parameter, the log entry will be divided into two lines, the first line is <code>User name: Guest</code> code>, the second line is <code>User name: Admin</code>.
</p>
<sample src="LogInjectionBad.py" />
<p>
In a good example, the program uses the <code>replace</code> function to provide parameter processing to the user, and replace <code>\r\n</code> and <code>\n</code>
In a good example, the program uses the <code>replace</code> function to provide parameter processing to the user, and replace <code>\r\n</code> and <code>\n</code>
with empty characters. To a certain extent, the occurrence of log injection vulnerabilities is reduced.
</p>

View File

@@ -4,17 +4,18 @@
* insertion of forged log entries by a malicious user.
* @kind path-problem
* @problem.severity error
* @precision high
* @security-severity 7.8
* @precision medium
* @id py/log-injection
* @tags security
* external/cwe/cwe-117
*/
import python
import experimental.semmle.python.security.injection.LogInjection
import semmle.python.security.dataflow.LogInjection
import DataFlow::PathGraph
from LogInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
from LogInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to log entry.", source.getNode(),
"User-provided value"

View File

@@ -9,8 +9,8 @@
* @id py/clear-text-logging-sensitive-data
* @tags security
* external/cwe/cwe-312
* external/cwe/cwe-315
* external/cwe/cwe-359
* external/cwe/cwe-532
*/
import python

View File

@@ -88,7 +88,7 @@ abstract class TlsLibrary extends string {
/** The name of a specific protocol version. */
abstract string specific_version_name(ProtocolVersion version);
/** Gets a name, which is a member of `version_constants`, that can be used to specify the protocol family `family`. */
/** Gets a name, which is a member of `version_constants`, that can be used to specify the protocol family `family`. */
abstract string unspecific_version_name(ProtocolFamily family);
/** Gets an API node representing the module or class holding the version constants. */
@@ -96,12 +96,12 @@ abstract class TlsLibrary extends string {
/** Gets an API node representing a specific protocol version. */
API::Node specific_version(ProtocolVersion version) {
result = version_constants().getMember(specific_version_name(version))
result = this.version_constants().getMember(this.specific_version_name(version))
}
/** Gets an API node representing the protocol family `family`. */
API::Node unspecific_version(ProtocolFamily family) {
result = version_constants().getMember(unspecific_version_name(family))
result = this.version_constants().getMember(this.unspecific_version_name(family))
}
/** Gets a creation of a context with a default protocol. */
@@ -112,14 +112,14 @@ abstract class TlsLibrary extends string {
/** Gets a creation of a context with a specific protocol version, known to be insecure. */
ContextCreation insecure_context_creation(ProtocolVersion version) {
result in [specific_context_creation(), default_context_creation()] and
result in [this.specific_context_creation(), this.default_context_creation()] and
result.getProtocol() = version and
version.isInsecure()
}
/** Gets a context that was created using `family`, known to have insecure instances. */
ContextCreation unspecific_context_creation(ProtocolFamily family) {
result in [specific_context_creation(), default_context_creation()] and
result in [this.specific_context_creation(), this.default_context_creation()] and
result.getProtocol() = family
}

View File

@@ -7,6 +7,7 @@
* @precision high
* @id py/polynomial-redos
* @tags security
* external/cwe/cwe-1333
* external/cwe/cwe-730
* external/cwe/cwe-400
*/

View File

@@ -8,6 +8,7 @@
* @precision high
* @id py/redos
* @tags security
* external/cwe/cwe-1333
* external/cwe/cwe-730
* external/cwe/cwe-400
*/

View File

@@ -14,7 +14,7 @@
import python
private import semmle.python.Concepts
import semmle.python.security.injection.RegexInjection
import semmle.python.security.dataflow.RegexInjection
import DataFlow::PathGraph
from

View File

@@ -0,0 +1,11 @@
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
<qhelp>
<overview>
<include src="ServerSideRequestForgery-start.inc.qhelp" />
<!-- query specific -->
<p>This query covers full SSRF, to find partial SSRF use the <code>py/partial-ssrf</code> query.</p>
</overview>
<include src="ServerSideRequestForgery-end.inc.qhelp" />
</qhelp>

View File

@@ -0,0 +1,25 @@
/**
* @name Full server-side request forgery
* @description Making a network request to a URL that is fully user-controlled allows for request forgery attacks.
* @kind path-problem
* @problem.severity error
* @security-severity 9.1
* @precision high
* @id py/full-ssrf
* @tags security
* external/cwe/cwe-918
*/
import python
import semmle.python.security.dataflow.ServerSideRequestForgery
import DataFlow::PathGraph
from
FullServerSideRequestForgery::Configuration fullConfig, DataFlow::PathNode source,
DataFlow::PathNode sink, HTTP::Client::Request request
where
request = sink.getNode().(FullServerSideRequestForgery::Sink).getRequest() and
fullConfig.hasFlowPath(source, sink) and
fullyControlledRequest(request)
select request, source, sink, "The full URL of this request depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -0,0 +1,11 @@
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
<qhelp>
<overview>
<include src="ServerSideRequestForgery-start.inc.qhelp" />
<!-- query specific -->
<p>This query covers partial SSRF, to find full SSRF use the <code>py/full-ssrf</code> query.</p>
</overview>
<include src="ServerSideRequestForgery-end.inc.qhelp" />
</qhelp>

View File

@@ -0,0 +1,25 @@
/**
* @name Partial server-side request forgery
* @description Making a network request to a URL that is partially user-controlled allows for request forgery attacks.
* @kind path-problem
* @problem.severity error
* @security-severity 9.1
* @precision medium
* @id py/partial-ssrf
* @tags security
* external/cwe/cwe-918
*/
import python
import semmle.python.security.dataflow.ServerSideRequestForgery
import DataFlow::PathGraph
from
PartialServerSideRequestForgery::Configuration partialConfig, DataFlow::PathNode source,
DataFlow::PathNode sink, HTTP::Client::Request request
where
request = sink.getNode().(PartialServerSideRequestForgery::Sink).getRequest() and
partialConfig.hasFlowPath(source, sink) and
not fullyControlledRequest(request)
select request, source, sink, "Part of the URL of this request depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -0,0 +1,47 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<recommendation>
<p>To guard against SSRF attacks you should avoid putting user-provided input directly
into a request URL. Instead, either maintain a list of authorized URLs on the server and choose
from that list based on the input provided, or perform proper validation of the input.
</p>
</recommendation>
<example>
<p>The following example shows code vulnerable to a full SSRF attack, because it
uses untrusted input (HTTP request parameter) directly to construct a URL. By using
<code>evil.com#</code> as the <code>target</code> value, the requested URL will be
<code>https://evil.com#.example.com/data/</code>. It also shows how to remedy the
problem by using the user input select a known fixed string.
</p>
<sample src="examples/ServerSideRequestForgery_full.py" />
</example>
<example>
<p>
The following example shows code vulnerable to a partial SSRF attack, because it
uses untrusted input (HTTP request parameter) directly to construct a URL. By
using <code>../transfer-funds-to/123?amount=456</code> as the
<code>user_id</code> value, the requested URL will be
<code>https://api.example.com/transfer-funds-to/123?amount=456</code>. It also
shows how to remedy the problem by validating the input.
</p>
<sample src="examples/ServerSideRequestForgery_partial.py" />
</example>
<references>
<li>
<a href="https://owasp.org/www-community/attacks/Server_Side_Request_Forgery">OWASP SSRF article</a>
</li>
<li>
<a href="https://portswigger.net/web-security/ssrf">PortSwigger SSRF article</a>
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,31 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<fragment>
<p>Directly incorporating user input into an HTTP request without validating the input
can facilitate server-side request forgery (SSRF) attacks. In these attacks, the
request may be changed, directed at a different server, or via a different
protocol. This can allow the attacker to obtain sensitive information or perform
actions with escalated privilege.
</p>
<p>
We make a distinctions between how much of the URL an attacker can control:
</p>
<ul>
<li><b>Full SSRF</b>: where the full URL can be controlled.</li>
<li><b>Partial SSRF</b>: where only part of the URL can be controlled, such as the
path component of a URL to a hardcoded domain.</li>
</ul>
<p></p>
<p>
Partial control of a URL is often much harder to exploit. Therefore we have created a
separate query for each of these.
</p>
</fragment>
</qhelp>

View File

@@ -0,0 +1,15 @@
import requests
from flask import Flask, request
app = Flask(__name__)
@app.route("/full_ssrf")
def full_ssrf():
target = request.args["target"]
# BAD: user has full control of URL
resp = request.get("https://" + target + ".example.com/data/")
# GOOD: `subdomain` is controlled by the server.
subdomain = "europe" if target == "EU" else "world"
resp = request.get("https://" + subdomain + ".example.com/data/")

View File

@@ -0,0 +1,15 @@
import requests
from flask import Flask, request
app = Flask(__name__)
@app.route("/partial_ssrf")
def partial_ssrf():
user_id = request.args["user_id"]
# BAD: user can fully control the path component of the URL
resp = requests.get("https://api.example.com/user_info/" + user_id)
if user_id.isalnum():
# GOOD: user_id is restricted to be alpha-numeric, and cannot alter path component of URL
resp = requests.get("https://api.example.com/user_info/" + user_id)

View File

@@ -46,7 +46,7 @@ predicate mismatched_tuple_rhs(Assign a, int lcount, int rcount, Location loc) {
lcount = len(l) and
rcount = r.length() and
lcount != rcount and
not exists(Starred s | l.getAnItem() = s)
not l.getAnItem() instanceof Starred
)
}

View File

@@ -22,10 +22,7 @@ predicate modification_of_locals(ControlFlowNode f) {
attr = f.(CallNode).getFunction() and
originIsLocals(attr.getObject(mname))
|
mname = "pop" or
mname = "popitem" or
mname = "update" or
mname = "clear"
mname in ["pop", "popitem", "update", "clear"]
)
}

View File

@@ -49,7 +49,7 @@ predicate mutates_globals(ModuleValue m) {
or
// In Python 3.8, Enum._convert_ is implemented using a metaclass, and our points-to
// analysis doesn't handle that well enough. So we need a special case for this
not exists(Value enum_convert | enum_convert = enum_class.attr("_convert")) and
not exists(enum_class.attr("_convert")) and
exists(CallNode call | call.getScope() = m.getScope() |
call.getFunction().(AttrNode).getObject(["_convert", "_convert_"]).pointsTo() = enum_class
)

View File

@@ -68,7 +68,7 @@ predicate undefined_use_in_function(Name u) {
predicate undefined_use_in_class_or_module(Name u) {
exists(GlobalVariable v | u.uses(v)) and
not exists(Function f | u.getScope().getScope*() = f) and
not u.getScope().getScope*() instanceof Function and
exists(SsaVariable var | var.getAUse().getNode() = u | var.maybeUndefined()) and
not guarded_against_name_error(u) and
not exists(ModuleValue m | m.getScope() = u.getEnclosingModule() | m.hasAttribute(u.getId())) and

View File

@@ -8,7 +8,7 @@ import semmle.python.pointsto.PointsTo
import semmle.python.pointsto.PointsToContext
predicate trivial(ControlFlowNode f) {
exists(Parameter p | p = f.getNode())
f.getNode() instanceof Parameter
or
f instanceof NameConstantNode
or

View File

@@ -3,7 +3,7 @@
* @description Generates use-definition pairs that provide the data
* for jump-to-definition in the code viewer.
* @kind definitions
* @id python/ide-jump-to-definition
* @id py/ide-jump-to-definition
* @tags ide-contextual-queries/local-definitions
*/

View File

@@ -3,7 +3,7 @@
* @description Generates use-definition pairs that provide the data
* for find-references in the code viewer.
* @kind definitions
* @id python/ide-find-references
* @id py/ide-find-references
* @tags ide-contextual-queries/local-references
*/

View File

@@ -0,0 +1,4 @@
---
category: newQuery
---
* The query "Log Injection" (`py/log-injection`) has been promoted from experimental to the main query pack. Its results will now appear when `security-extended` is used. This query was originally [submitted as an experimental query by @haby0](https://github.com/github/codeql/pull/6182).

View File

@@ -0,0 +1,4 @@
---
category: newQuery
---
* The query "LDAP query built from user-controlled sources" (`py/ldap-injection`) has been promoted from experimental to the main query pack. Its results will now appear by default. This query was originally [submitted as an experimental query by @jorgectf](https://github.com/github/codeql/pull/5443).

View File

@@ -0,0 +1,5 @@
## 0.0.4
### Query Metadata Changes
* Fixed the query ids of two queries that are meant for manual exploration: `python/count-untrusted-data-external-api` and `python/untrusted-data-to-external-api` have been changed to `py/count-untrusted-data-external-api` and `py/untrusted-data-to-external-api`.

View File

@@ -0,0 +1,9 @@
## 0.0.5
### Minor Analysis Improvements
* Added modeling of many functions from the `os` module that uses file system paths, such as `os.stat`, `os.chdir`, `os.mkdir`, and so on. All of these are new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of the `tempfile` module for creating temporary files and directories, such as the functions `tempfile.NamedTemporaryFile` and `tempfile.TemporaryDirectory`. The `suffix`, `prefix`, and `dir` arguments are all vulnerable to path-injection, and these are new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Extended the modeling of FastAPI such that `fastapi.responses.FileResponse` are considered `FileSystemAccess`, making them sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of the `posixpath`, `ntpath`, and `genericpath` modules for path operations (although these are not supposed to be used), resulting in new sinks for the _Uncontrolled data used in path expression_ (`py/path-injection`) query.
* Added modeling of `wsgiref.simple_server` applications, leading to new remote flow sources.

View File

@@ -0,0 +1,9 @@
## 0.0.6
### New Queries
* Two new queries have been added for detecting Server-side request forgery (SSRF). _Full server-side request forgery_ (`py/full-ssrf`) will only alert when the URL is fully user-controlled, and _Partial server-side request forgery_ (`py/partial-ssrf`) will alert when any part of the URL is user-controlled. Only `py/full-ssrf` will be run by default.
### Minor Analysis Improvements
* To support the new SSRF queries, the PyPI package `requests` has been modeled, along with `http.client.HTTP[S]Connection` from the standard library.

View File

@@ -0,0 +1 @@
## 0.0.7

View File

@@ -0,0 +1,5 @@
## 0.0.8
### Major Analysis Improvements
* User names and other account information is no longer considered to be sensitive data for the queries `py/clear-text-logging-sensitive-data` and `py/clear-text-storage-sensitive-data`, since this lead to many false positives.

View File

@@ -0,0 +1,5 @@
## 0.0.9
### Bug Fixes
* The [View AST functionality](https://codeql.github.com/docs/codeql-for-visual-studio-code/exploring-the-structure-of-your-source-code/) no longer prints detailed information about regular expressions, greatly improving performance.

View File

@@ -0,0 +1,2 @@
---
lastReleaseVersion: 0.0.9

View File

@@ -1,13 +0,0 @@
#!/bin/bash
set -Eeuo pipefail # see https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/
# Promotes new dataflow queries to be the real ones
SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
cd $SCRIPTDIR
for file in $(find . -mindepth 2); do
echo "Promoting $file"
mkdir -p "../../Security/$(dirname $file)"
mv "$file" "../../Security/${file}"
done

View File

@@ -1,35 +0,0 @@
/**
* @name Binding a socket to all network interfaces
* @description Binding a socket to all interfaces opens it up to traffic from any IPv4 address
* and is therefore associated with security risks.
* @kind problem
* @id py/old/bind-socket-all-network-interfaces
* @problem.severity error
*/
import python
Value aSocket() { result.getClass() = Value::named("socket.socket") }
CallNode socketBindCall() {
result = aSocket().attr("bind").(CallableValue).getACall() and major_version() = 3
or
result.getFunction().(AttrNode).getObject("bind").pointsTo(aSocket()) and
major_version() = 2
}
string allInterfaces() { result = "0.0.0.0" or result = "" }
Value getTextValue(string address) {
result = Value::forUnicode(address) and major_version() = 3
or
result = Value::forString(address) and major_version() = 2
}
from CallNode call, TupleValue args, string address
where
call = socketBindCall() and
call.getArg(0).pointsTo(args) and
args.getItem(0) = getTextValue(address) and
address = allInterfaces()
select call.getNode(), "'" + address + "' binds a socket to all interfaces."

View File

@@ -1,38 +0,0 @@
/**
* @name OLD QUERY: Uncontrolled data used in path expression
* @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
* @kind path-problem
* @problem.severity error
* @id py/old/path-injection
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.security.injection.Path
class PathInjectionConfiguration extends TaintTracking::Configuration {
PathInjectionConfiguration() { this = "Path injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof OpenNode }
override predicate isSanitizer(Sanitizer sanitizer) {
sanitizer instanceof PathSanitizer or
sanitizer instanceof NormalizedPathSanitizer
}
override predicate isExtension(TaintTracking::Extension extension) {
extension instanceof AbsPath
}
}
from PathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This path depends on $@.", src.getSource(),
"a user-provided value"

View File

@@ -1,36 +0,0 @@
/**
* @name OLD QUERY: Uncontrolled command line
* @description Using externally controlled strings in a command line may allow a malicious
* user to change the meaning of the command.
* @kind path-problem
* @problem.severity error
* @id py/old/command-line-injection
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.security.injection.Command
class CommandInjectionConfiguration extends TaintTracking::Configuration {
CommandInjectionConfiguration() { this = "Command injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof CommandSink }
override predicate isExtension(TaintTracking::Extension extension) {
extension instanceof FirstElementFlow
or
extension instanceof FabricExecuteExtension
}
}
from CommandInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This command depends on $@.", src.getSource(),
"a user-provided value"

View File

@@ -1,37 +0,0 @@
/**
* @name OLD QUERY: Reflected server-side cross-site scripting
* @description Writing user input directly to a web page
* allows for a cross-site scripting vulnerability.
* @kind path-problem
* @problem.severity error
* @id py/old/reflective-xss
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.web.HttpResponse
/* Flow */
import semmle.python.security.strings.Untrusted
class ReflectedXssConfiguration extends TaintTracking::Configuration {
ReflectedXssConfiguration() { this = "Reflected XSS configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) {
sink instanceof HttpResponseTaintSink and
not sink instanceof DjangoResponseContent
or
sink instanceof DjangoResponseContentXSSVulnerable
}
}
from ReflectedXssConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "Cross-site scripting vulnerability due to $@.", src.getSource(),
"a user-provided value"

View File

@@ -1,47 +0,0 @@
/**
* @name OLD QUERY: SQL query built from user-controlled sources
* @description Building a SQL query from user-controlled sources is vulnerable to insertion of
* malicious SQL code by the user.
* @kind path-problem
* @problem.severity error
* @id py/old/sql-injection
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.security.injection.Sql
import semmle.python.web.django.Db
import semmle.python.web.django.Model
class SQLInjectionConfiguration extends TaintTracking::Configuration {
SQLInjectionConfiguration() { this = "SQL injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof SqlInjectionSink }
}
/*
* Additional configuration to support tracking of DB objects. Connections, cursors, etc.
* Without this configuration (or the LegacyConfiguration), the pattern of
* `any(MyTaintKind k).taints(control_flow_node)` used in DbConnectionExecuteArgument would not work.
*/
class DbConfiguration extends TaintTracking::Configuration {
DbConfiguration() { this = "DB configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof DjangoModelObjects or
source instanceof DbConnectionSource
}
}
from SQLInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This SQL query depends on $@.", src.getSource(),
"a user-provided value"

View File

@@ -1,30 +0,0 @@
/**
* @name Code injection
* @description OLD QUERY: Interpreting unsanitized user input as code allows a malicious user arbitrary
* code execution.
* @kind path-problem
* @problem.severity error
* @id py/old/code-injection
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.security.injection.Exec
class CodeInjectionConfiguration extends TaintTracking::Configuration {
CodeInjectionConfiguration() { this = "Code injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof StringEvaluationNode }
}
from CodeInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "$@ flows to here and is interpreted as code.", src.getSource(),
"A user-provided value"

View File

@@ -1,33 +0,0 @@
/**
* @name Clear-text logging of sensitive information
* @description OLD QUERY: Logging sensitive information without encryption or hashing can
* expose it to an attacker.
* @kind path-problem
* @problem.severity error
* @id py/old/clear-text-logging-sensitive-data
* @deprecated
*/
import python
import semmle.python.security.Paths
import semmle.python.dataflow.TaintTracking
import semmle.python.security.SensitiveData
import semmle.python.security.ClearText
class CleartextLoggingConfiguration extends TaintTracking::Configuration {
CleartextLoggingConfiguration() { this = "ClearTextLogging" }
override predicate isSource(DataFlow::Node src, TaintKind kind) {
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
}
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
sink.asCfgNode() instanceof ClearTextLogging::Sink and
kind instanceof SensitiveData
}
}
from CleartextLoggingConfiguration config, TaintedPathSource source, TaintedPathSink sink
where config.hasFlowPath(source, sink)
select sink.getSink(), source, sink, "Sensitive data returned by $@ is logged here.",
source.getSource(), source.getCfgNode().(SensitiveData::Source).repr()

View File

@@ -1,33 +0,0 @@
/**
* @name Clear-text storage of sensitive information
* @description OLD QUERY: Sensitive information stored without encryption or hashing can expose it to an
* attacker.
* @kind path-problem
* @problem.severity error
* @id py/old/clear-text-storage-sensitive-data
* @deprecated
*/
import python
import semmle.python.security.Paths
import semmle.python.dataflow.TaintTracking
import semmle.python.security.SensitiveData
import semmle.python.security.ClearText
class CleartextStorageConfiguration extends TaintTracking::Configuration {
CleartextStorageConfiguration() { this = "ClearTextStorage" }
override predicate isSource(DataFlow::Node src, TaintKind kind) {
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
}
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
sink.asCfgNode() instanceof ClearTextStorage::Sink and
kind instanceof SensitiveData
}
}
from CleartextStorageConfiguration config, TaintedPathSource source, TaintedPathSink sink
where config.hasFlowPath(source, sink)
select sink.getSink(), source, sink, "Sensitive data from $@ is stored here.", source.getSource(),
source.getCfgNode().(SensitiveData::Source).repr()

View File

@@ -1,78 +0,0 @@
/**
* @name OLD QUERY: Use of weak cryptographic key
* @description Use of a cryptographic key that is too small may allow the encryption to be broken.
* @kind problem
* @problem.severity error
* @id py/old/weak-crypto-key
*/
import python
int minimumSecureKeySize(string algo) {
algo = "DSA" and result = 2048
or
algo = "RSA" and result = 2048
or
algo = "ECC" and result = 224
}
predicate dsaRsaKeySizeArg(FunctionValue func, string algorithm, string arg) {
exists(ModuleValue mod | func = mod.attr(_) |
algorithm = "DSA" and
(
mod = Module::named("cryptography.hazmat.primitives.asymmetric.dsa") and arg = "key_size"
or
mod = Module::named("Crypto.PublicKey.DSA") and arg = "bits"
or
mod = Module::named("Cryptodome.PublicKey.DSA") and arg = "bits"
)
or
algorithm = "RSA" and
(
mod = Module::named("cryptography.hazmat.primitives.asymmetric.rsa") and arg = "key_size"
or
mod = Module::named("Crypto.PublicKey.RSA") and arg = "bits"
or
mod = Module::named("Cryptodome.PublicKey.RSA") and arg = "bits"
)
)
}
predicate ecKeySizeArg(FunctionValue func, string arg) {
exists(ModuleValue mod | func = mod.attr(_) |
mod = Module::named("cryptography.hazmat.primitives.asymmetric.ec") and arg = "curve"
)
}
int keySizeFromCurve(ClassValue curveClass) {
result = curveClass.declaredAttribute("key_size").(NumericValue).getIntValue()
}
predicate algorithmAndKeysizeForCall(
CallNode call, string algorithm, int keySize, ControlFlowNode keyOrigin
) {
exists(FunctionValue func, string argname, ControlFlowNode arg |
arg = func.getNamedArgumentForCall(call, argname)
|
exists(NumericValue key |
arg.pointsTo(key, keyOrigin) and
dsaRsaKeySizeArg(func, algorithm, argname) and
keySize = key.getIntValue()
)
or
exists(Value curveClassInstance |
algorithm = "ECC" and
ecKeySizeArg(func, argname) and
arg.pointsTo(_, curveClassInstance, keyOrigin) and
keySize = keySizeFromCurve(curveClassInstance.getClass())
)
)
}
from CallNode call, string algo, int keySize, ControlFlowNode origin
where
algorithmAndKeysizeForCall(call, algo, keySize, origin) and
keySize < minimumSecureKeySize(algo)
select call,
"Creation of an " + algo + " key uses $@ bits, which is below " + minimumSecureKeySize(algo) +
" and considered breakable.", origin, keySize.toString()

View File

@@ -1,28 +0,0 @@
/**
* @name OLD QUERY: Use of a broken or weak cryptographic algorithm
* @description Using broken or weak cryptographic algorithms can compromise security.
* @kind path-problem
* @problem.severity warning
* @id py/old/weak-cryptographic-algorithm
* @deprecated
*/
import python
import semmle.python.security.Paths
import semmle.python.security.SensitiveData
import semmle.python.security.Crypto
class BrokenCryptoConfiguration extends TaintTracking::Configuration {
BrokenCryptoConfiguration() { this = "Broken crypto configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof SensitiveDataSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof WeakCryptoSink }
}
from BrokenCryptoConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "$@ is used in a broken or weak cryptographic algorithm.",
src.getSource(), "Sensitive data"

View File

@@ -1,32 +0,0 @@
/**
* @name OLD QUERY: Deserializing untrusted input
* @description Deserializing user-controlled data may allow attackers to execute arbitrary code.
* @kind path-problem
* @id py/old/unsafe-deserialization
* @problem.severity error
*/
import python
import semmle.python.security.Paths
// Sources -- Any untrusted input
import semmle.python.web.HttpRequest
// Flow -- untrusted string
import semmle.python.security.strings.Untrusted
// Sink -- Unpickling and other deserialization formats.
import semmle.python.security.injection.Pickle
import semmle.python.security.injection.Marshal
import semmle.python.security.injection.Yaml
class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
UnsafeDeserializationConfiguration() { this = "Unsafe deserialization configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof DeserializationSink }
}
from UnsafeDeserializationConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "Deserializing of $@.", src.getSource(), "untrusted input"

View File

@@ -1,37 +0,0 @@
/**
* @name OLD QUERY: URL redirection from remote source
* @description URL redirection based on unvalidated user input
* may cause redirection to malicious web sites.
* @kind path-problem
* @problem.severity error
* @id py/old/url-redirection
*/
import python
import semmle.python.security.Paths
import semmle.python.web.HttpRedirect
import semmle.python.web.HttpRequest
import semmle.python.security.strings.Untrusted
/** Url redirection is a problem only if the user controls the prefix of the URL */
class UntrustedPrefixStringKind extends UntrustedStringKind {
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
result = UntrustedStringKind.super.getTaintForFlowStep(fromnode, tonode) and
not tonode.(BinaryExprNode).getRight() = fromnode
}
}
class UrlRedirectConfiguration extends TaintTracking::Configuration {
UrlRedirectConfiguration() { this = "URL redirect configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof HttpRedirectTaintSink }
}
from UrlRedirectConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "Untrusted URL redirection due to $@.", src.getSource(),
"a user-provided value"

View File

@@ -21,11 +21,11 @@ import semmle.python.security.strings.Untrusted
class TemplateInjectionConfiguration extends TaintTracking::Configuration {
TemplateInjectionConfiguration() { this = "Template injection configuration" }
override predicate isSource(TaintTracking::Source source) {
deprecated override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof SSTISink }
deprecated override predicate isSink(TaintTracking::Sink sink) { sink instanceof SSTISink }
}
from TemplateInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink

View File

@@ -1,21 +0,0 @@
/**
* @name LDAP query built from user-controlled sources
* @description Building an LDAP query from user-controlled sources is vulnerable to insertion of
* malicious LDAP code by the user.
* @kind path-problem
* @problem.severity error
* @id py/ldap-injection
* @tags experimental
* security
* external/cwe/cwe-090
*/
// Determine precision above
import python
import experimental.semmle.python.security.injection.LDAP
import DataFlow::PathGraph
from LDAPInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ LDAP query parameter comes from $@.", sink.getNode(),
"This", source.getNode(), "a user-provided value"

View File

@@ -20,11 +20,11 @@ import experimental.semmle.python.security.injection.XSLT
class XSLTInjectionConfiguration extends TaintTracking::Configuration {
XSLTInjectionConfiguration() { this = "XSLT injection configuration" }
override predicate isSource(TaintTracking::Source source) {
deprecated override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) {
deprecated override predicate isSink(TaintTracking::Sink sink) {
sink instanceof XSLTInjection::XSLTInjectionSink
}
}

View File

@@ -4,8 +4,7 @@
* @kind problem
* @problem.severity warning
* @id py/improper-ldap-auth
* @tags experimental
* security
* @tags security
* external/cwe/cwe-287
*/

View File

@@ -0,0 +1,48 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Using a cryptographically weak pseudo-random number generator to generate a security-sensitive value,
such as a password, makes it easier for an attacker to predict the value.
</p>
<p>
Pseudo-random number generators generate a sequence of numbers that only approximates the properties
of random numbers. The sequence is not truly random because it is completely determined by a
relatively small set of initial values, the seed. If the random number generator is
cryptographically weak, then this sequence may be easily predictable through outside observations.
</p>
</overview>
<recommendation>
<p>
Use a cryptographically secure pseudo-random number generator if the output is to be used in a
security sensitive context. As a rule of thumb, a value should be considered "security sensitive"
if predicting it would allow the attacker to perform an action that they would otherwise be unable
to perform. For example, if an attacker could predict the random password generated for a new user,
they would be able to log in as that new user.
</p>
<p>
For Python, <code>secrets</code> provides a cryptographically secure pseudo-random
number generator. <code>random</code> is not cryptographically secure, and should be avoided in
security contexts.
</p>
</recommendation>
<example>
<p>
The example below uses the <code>random</code> package instead of <code>secrets</code> to generate a password:
</p>
<sample src="examples/InsecureRandomness.py" />
<p>
Instead, use <code>secrets</code>:
</p>
<sample src="examples/InsecureRandomnessGood.py" />
</example>
<references>
<li>Wikipedia. <a href="http://en.wikipedia.org/wiki/Pseudorandom_number_generator">Pseudo-random number generator</a>.</li>
<li>OWASP: <a href="https://owasp.org/www-community/vulnerabilities/Insecure_Randomness">Insecure Randomness</a>.</li>
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Cryptographic_Storage_Cheat_Sheet.html#secure-random-number-generation">Secure Random Number Generation</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,23 @@
/**
* @name Insecure randomness
* @description Using a cryptographically weak pseudo-random number generator to generate a
* security-sensitive value may allow an attacker to predict what value will
* be generated.
* @kind path-problem
* @problem.severity warning
* @security-severity 7.8
* @precision high
* @id py/insecure-randomness
* @tags security
* external/cwe/cwe-338
*/
import python
import experimental.semmle.python.security.InsecureRandomness::InsecureRandomness
import semmle.python.dataflow.new.DataFlow
import DataFlow::PathGraph
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Cryptographically insecure $@ in a security context.",
source.getNode(), "random value"

View File

@@ -0,0 +1,6 @@
import random
def generatePassword():
# BAD: the random is not cryptographically secure
return random.random()

View File

@@ -0,0 +1,7 @@
import secrets
def generatePassword():
# GOOD: the random is cryptographically secure
secret_generator = secrets.SystemRandom()
return secret_generator.random()

View File

@@ -0,0 +1,7 @@
import jwt
# algorithm set to None
jwt.encode(payload, "somekey", None)
# empty key
jwt.encode(payload, key="", algorithm="HS256")

View File

@@ -0,0 +1,30 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Applications encoding a JSON Web Token (JWT) may be vulnerable when the applied key or algorithm
is empty or <code>None</code>.</p>
</overview>
<recommendation>
<p>Use non-empty nor <code>None</code> values while encoding JWT payloads.</p>
</recommendation>
<example>
<p>This example shows two PyJWT encoding calls.
In the first place, the encoding process use a None algorithm whereas the second example uses an
empty key. Both examples leave the payload insecurely encoded.
</p>
<sample src="JWTEmptyKeyOrAlgorithm.py" />
</example>
<references>
<li>PyJWT: <a href="https://pyjwt.readthedocs.io/en/stable/">Documentation</a>.</li>
<li>Authlib JWT: <a href="https://docs.authlib.org/en/latest/specs/rfc7519.html">Documentation</a>.</li>
<li>Python-Jose: <a href="https://github.com/mpdavis/python-jose">Documentation</a>.</li>
<li>Auth0 Blog: <a href="https://auth0.com/blog/critical-vulnerabilities-in-json-web-token-libraries/#Meet-the--None--Algorithm">Meet the "None" Algorithm</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,22 @@
/**
* @name JWT encoding using empty key or algorithm
* @description The application uses an empty secret or algorithm while encoding a JWT Token.
* @kind problem
* @problem.severity warning
* @id py/jwt-empty-secret-or-algorithm
* @tags security
*/
// determine precision above
import python
import experimental.semmle.python.Concepts
import experimental.semmle.python.frameworks.JWT
from JWTEncoding jwtEncoding, string affectedComponent
where
affectedComponent = "algorithm" and
isEmptyOrNone(jwtEncoding.getAlgorithm())
or
affectedComponent = "key" and
isEmptyOrNone(jwtEncoding.getKey())
select jwtEncoding, "This JWT encoding has an empty " + affectedComponent + "."

View File

@@ -0,0 +1,4 @@
import jwt
# unverified decoding
jwt.decode(payload, key="somekey", verify=False)

View File

@@ -0,0 +1,30 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Applications decoding a JSON Web Token (JWT) may be vulnerable when the
key isn't verified in the process.
</p>
</overview>
<recommendation>
<p>Set the <code>verify</code> argument to <code>True</code> or use
a framework that does it by default.
</p>
</recommendation>
<example>
<p>This example shows a PyJWT encoding call with the <code>verify</code>
argument set to <code>False</code>.
</p>
<sample src="JWTMissingSecretOrPublicKeyVerification.py" />
</example>
<references>
<li>PyJWT: <a href="https://pyjwt.readthedocs.io/en/stable/">Documentation</a>.</li>
<li>Authlib JWT: <a href="https://docs.authlib.org/en/latest/specs/rfc7519.html">Documentation</a>.</li>
<li>Python-Jose: <a href="https://github.com/mpdavis/python-jose">Documentation</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,17 @@
/**
* @name JWT missing secret or public key verification
* @description The application does not verify the JWT payload with a cryptographic secret or public key.
* @kind problem
* @problem.severity warning
* @id py/jwt-missing-verification
* @tags security
* external/cwe/cwe-347
*/
// determine precision above
import python
import experimental.semmle.python.Concepts
from JWTDecoding jwtDecoding
where not jwtDecoding.verifiesSignature()
select jwtDecoding.getPayload(), "is not verified with a cryptographic secret or public key."

View File

@@ -4,8 +4,7 @@
* @kind path-problem
* @problem.severity error
* @id py/insecure-ldap-auth
* @tags experimental
* security
* @tags security
* external/cwe/cwe-522
* external/cwe/cwe-523
*/

View File

@@ -5,15 +5,15 @@
* @kind path-problem
* @problem.severity error
* @id py/nosql-injection
* @tags experimental
* security
* @tags security
* external/cwe/cwe-943
*/
import python
import experimental.semmle.python.security.injection.NoSQLInjection
import DataFlow::PathGraph
from CustomPathNode source, CustomPathNode sink
where noSQLInjectionFlow(source, sink)
from NoSQLInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink, source, sink, "$@ NoSQL query contains an unsanitized $@", sink, "This", source,
"user-provided value"

View File

@@ -14,36 +14,6 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import experimental.semmle.python.Frameworks
/** Provides classes for modeling log related APIs. */
module LogOutput {
/**
* A data flow node for log output.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `LogOutput` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Get the parameter value of the log output function.
*/
abstract DataFlow::Node getAnInput();
}
}
/**
* A data flow node for log output.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `LogOutput::Range` instead.
*/
class LogOutput extends DataFlow::Node {
LogOutput::Range range;
LogOutput() { this = range }
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
/** Provides classes for modeling LDAP query execution-related APIs. */
module LDAPQuery {
/**
@@ -297,6 +267,144 @@ class HeaderDeclaration extends DataFlow::Node {
DataFlow::Node getValueArg() { result = range.getValueArg() }
}
/** Provides classes for modeling JWT encoding-related APIs. */
module JWTEncoding {
/**
* A data-flow node that collects methods encoding a JWT token.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `JWTEncoding` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the encoding payload.
*/
abstract DataFlow::Node getPayload();
/**
* Gets the argument containing the encoding key.
*/
abstract DataFlow::Node getKey();
/**
* Gets the argument for the algorithm used in the encoding.
*/
abstract DataFlow::Node getAlgorithm();
/**
* Gets a string representation of the algorithm used in the encoding.
*/
abstract string getAlgorithmString();
}
}
/**
* A data-flow node that collects methods encoding a JWT token.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `JWTEncoding::Range` instead.
*/
class JWTEncoding extends DataFlow::Node instanceof JWTEncoding::Range {
/**
* Gets the argument containing the payload.
*/
DataFlow::Node getPayload() { result = super.getPayload() }
/**
* Gets the argument containing the encoding key.
*/
DataFlow::Node getKey() { result = super.getKey() }
/**
* Gets the argument for the algorithm used in the encoding.
*/
DataFlow::Node getAlgorithm() { result = super.getAlgorithm() }
/**
* Gets a string representation of the algorithm used in the encoding.
*/
string getAlgorithmString() { result = super.getAlgorithmString() }
}
/** Provides classes for modeling JWT decoding-related APIs. */
module JWTDecoding {
/**
* A data-flow node that collects methods decoding a JWT token.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `JWTDecoding` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the argument containing the encoding payload.
*/
abstract DataFlow::Node getPayload();
/**
* Gets the argument containing the encoding key.
*/
abstract DataFlow::Node getKey();
/**
* Gets the argument for the algorithm used in the encoding.
*/
abstract DataFlow::Node getAlgorithm();
/**
* Gets a string representation of the algorithm used in the encoding.
*/
abstract string getAlgorithmString();
/**
* Gets the options Node used in the encoding.
*/
abstract DataFlow::Node getOptions();
/**
* Checks if the signature gets verified while decoding.
*/
abstract predicate verifiesSignature();
}
}
/**
* A data-flow node that collects methods encoding a JWT token.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `JWTDecoding::Range` instead.
*/
class JWTDecoding extends DataFlow::Node instanceof JWTDecoding::Range {
/**
* Gets the argument containing the payload.
*/
DataFlow::Node getPayload() { result = super.getPayload() }
/**
* Gets the argument containing the encoding key.
*/
DataFlow::Node getKey() { result = super.getKey() }
/**
* Gets the argument for the algorithm used in the encoding.
*/
DataFlow::Node getAlgorithm() { result = super.getAlgorithm() }
/**
* Gets a string representation of the algorithm used in the encoding.
*/
string getAlgorithmString() { result = super.getAlgorithmString() }
/**
* Gets the options Node used in the encoding.
*/
DataFlow::Node getOptions() { result = super.getOptions() }
/**
* Checks if the signature gets verified while decoding.
*/
predicate verifiesSignature() { super.verifiesSignature() }
}
/** Provides classes for modeling Email APIs. */
module EmailSender {
/**

View File

@@ -8,8 +8,11 @@ private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Werkzeug
private import experimental.semmle.python.frameworks.LDAP
private import experimental.semmle.python.frameworks.NoSQL
private import experimental.semmle.python.frameworks.Log
private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.JWT
private import experimental.semmle.python.libraries.PyJWT
private import experimental.semmle.python.libraries.Python_JWT
private import experimental.semmle.python.libraries.Authlib
private import experimental.semmle.python.libraries.PythonJose
private import experimental.semmle.python.frameworks.Sendgrid
private import experimental.semmle.python.libraries.FlaskMail
private import experimental.semmle.python.libraries.SmtpLib

View File

@@ -0,0 +1,23 @@
private import python
private import semmle.python.ApiGraphs
/** Checks if the argument is empty or none. */
predicate isEmptyOrNone(DataFlow::Node arg) { isEmpty(arg) or isNone(arg) }
/** Checks if an empty string `""` flows to `arg` */
predicate isEmpty(DataFlow::Node arg) {
exists(StrConst emptyString |
emptyString.getText() = "" and
DataFlow::exprNode(emptyString).(DataFlow::LocalSourceNode).flowsTo(arg)
)
}
/** Checks if `None` flows to `arg` */
predicate isNone(DataFlow::Node arg) {
DataFlow::exprNode(any(None no)).(DataFlow::LocalSourceNode).flowsTo(arg)
}
/** Checks if `False` flows to `arg` */
predicate isFalse(DataFlow::Node arg) {
DataFlow::exprNode(any(False falseExpr)).(DataFlow::LocalSourceNode).flowsTo(arg)
}

View File

@@ -1,118 +0,0 @@
/**
* Provides classes modeling security-relevant aspects of the log libraries.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.frameworks.Flask
private import semmle.python.ApiGraphs
/**
* Provides models for Python's log-related libraries.
*/
private module log {
/**
* Log output method list.
*
* See https://docs.python.org/3/library/logging.html#logger-objects
*/
private class LogOutputMethods extends string {
LogOutputMethods() {
this in ["info", "error", "warn", "warning", "debug", "critical", "exception", "log"]
}
}
/**
* The class used to find the log output method of the `logging` module.
*
* See `LogOutputMethods`
*/
private class LoggingCall extends DataFlow::CallCfgNode, LogOutput::Range {
LoggingCall() {
this = API::moduleImport("logging").getMember(any(LogOutputMethods m)).getACall()
}
override DataFlow::Node getAnInput() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and
result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg"
or
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and
result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))]
}
}
/**
* The class used to find log output methods related to the `logging.getLogger` instance.
*
* See `LogOutputMethods`
*/
private class LoggerCall extends DataFlow::CallCfgNode, LogOutput::Range {
LoggerCall() {
this =
API::moduleImport("logging")
.getMember("getLogger")
.getReturn()
.getMember(any(LogOutputMethods m))
.getACall()
}
override DataFlow::Node getAnInput() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and
result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg"
or
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and
result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))]
}
}
/**
* The class used to find the relevant log output method of the `flask.Flask.logger` instance (flask application).
*
* See `LogOutputMethods`
*/
private class FlaskLoggingCall extends DataFlow::CallCfgNode, LogOutput::Range {
FlaskLoggingCall() {
this =
Flask::FlaskApp::instance()
.getMember("logger")
.getMember(any(LogOutputMethods m))
.getACall()
}
override DataFlow::Node getAnInput() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and
result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg"
or
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and
result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))]
}
}
/**
* The class used to find the relevant log output method of the `django.utils.log.request_logger` instance (django application).
*
* See `LogOutputMethods`
*/
private class DjangoLoggingCall extends DataFlow::CallCfgNode, LogOutput::Range {
DjangoLoggingCall() {
this =
API::moduleImport("django")
.getMember("utils")
.getMember("log")
.getMember("request_logger")
.getMember(any(LogOutputMethods m))
.getACall()
}
override DataFlow::Node getAnInput() {
this.getFunction().(DataFlow::AttrRead).getAttributeName() != "log" and
result in [this.getArg(_), this.getArgByName(_)] // this includes the arg named "msg"
or
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "log" and
result in [this.getArg(any(int i | i > 0)), this.getArgByName(any(string s | s != "level"))]
}
}
}

View File

@@ -0,0 +1,87 @@
private import python
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private import experimental.semmle.python.frameworks.JWT
private module Authlib {
/** Gets a reference to `authlib.jose.(jwt|JsonWebToken)` */
private API::Node authlibJWT() {
result in [
API::moduleImport("authlib").getMember("jose").getMember("jwt"),
API::moduleImport("authlib").getMember("jose").getMember("JsonWebToken").getReturn()
]
}
/** Gets a reference to `jwt.encode` */
private API::Node authlibJWTEncode() { result = authlibJWT().getMember("encode") }
/** Gets a reference to `jwt.decode` */
private API::Node authlibJWTDecode() { result = authlibJWT().getMember("decode") }
/**
* Gets a call to `authlib.jose.(jwt|JsonWebToken).encode`.
*
* Given the following example:
*
* ```py
* jwt.encode({"alg": "HS256"}, token, "key")
* ```
*
* * `this` would be `jwt.encode({"alg": "HS256"}, token, "key")`.
* * `getPayload()`'s result would be `token`.
* * `getKey()`'s result would be `"key"`.
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
*/
private class AuthlibJWTEncodeCall extends DataFlow::CallCfgNode, JWTEncoding::Range {
AuthlibJWTEncodeCall() { this = authlibJWTEncode().getACall() }
override DataFlow::Node getPayload() { result = this.getArg(1) }
override DataFlow::Node getKey() { result = this.getArg(2) }
override DataFlow::Node getAlgorithm() {
exists(KeyValuePair headerDict |
headerDict = this.getArg(0).asExpr().(Dict).getItem(_) and
headerDict.getKey().(Str_).getS().matches("alg") and
result.asExpr() = headerDict.getValue()
)
}
override string getAlgorithmString() {
exists(StrConst str |
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getAlgorithm()) and
result = str.getText()
)
}
}
/**
* Gets a call to `authlib.jose.(jwt|JsonWebToken).decode`
*
* Given the following example:
*
* ```py
* jwt.decode(token, key)
* ```
*
* * `this` would be `jwt.decode(token, key)`.
* * `getPayload()`'s result would be `token`.
* * `getKey()`'s result would be `key`.
*/
private class AuthlibJWTDecodeCall extends DataFlow::CallCfgNode, JWTDecoding::Range {
AuthlibJWTDecodeCall() { this = authlibJWTDecode().getACall() }
override DataFlow::Node getPayload() { result = this.getArg(0) }
override DataFlow::Node getKey() { result = this.getArg(1) }
override DataFlow::Node getAlgorithm() { none() }
override string getAlgorithmString() { none() }
override DataFlow::Node getOptions() { none() }
override predicate verifiesSignature() { any() }
}
}

View File

@@ -0,0 +1,108 @@
private import python
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private import experimental.semmle.python.frameworks.JWT
private module PyJWT {
/** Gets a reference to `jwt.encode` */
private API::Node pyjwtEncode() { result = API::moduleImport("jwt").getMember("encode") }
/** Gets a reference to `jwt.decode` */
private API::Node pyjwtDecode() { result = API::moduleImport("jwt").getMember("decode") }
/**
* Gets a call to `jwt.encode`.
*
* Given the following example:
*
* ```py
* jwt.encode(token, "key", "HS256")
* ```
*
* * `this` would be `jwt.encode(token, "key", "HS256")`.
* * `getPayload()`'s result would be `token`.
* * `getKey()`'s result would be `"key"`.
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
*/
private class PyJWTEncodeCall extends DataFlow::CallCfgNode, JWTEncoding::Range {
PyJWTEncodeCall() { this = pyjwtEncode().getACall() }
override DataFlow::Node getPayload() {
result in [this.getArg(0), this.getArgByName("payload")]
}
override DataFlow::Node getKey() { result in [this.getArg(1), this.getArgByName("key")] }
override DataFlow::Node getAlgorithm() {
result in [this.getArg(2), this.getArgByName("algorithm")]
}
override string getAlgorithmString() {
exists(StrConst str |
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getAlgorithm()) and
result = str.getText()
)
}
}
/**
* Gets a call to `jwt.decode`.
*
* Given the following example:
*
* ```py
* jwt.decode(token, key, "HS256", options={"verify_signature": True})
* ```
*
* * `this` would be `jwt.decode(token, key, options={"verify_signature": True})`.
* * `getPayload()`'s result would be `token`.
* * `getKey()`'s result would be `key`.
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
* * `getOptions()`'s result would be `{"verify_signature": True}`.
* * `verifiesSignature()` predicate would succeed.
*/
private class PyJWTDecodeCall extends DataFlow::CallCfgNode, JWTDecoding::Range {
PyJWTDecodeCall() { this = pyjwtDecode().getACall() }
override DataFlow::Node getPayload() { result in [this.getArg(0), this.getArgByName("jwt")] }
override DataFlow::Node getKey() { result in [this.getArg(1), this.getArgByName("key")] }
override DataFlow::Node getAlgorithm() {
result in [this.getArg(2), this.getArgByName("algorithms")]
}
override string getAlgorithmString() {
exists(StrConst str |
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getAlgorithm()) and
result = str.getText()
)
}
override DataFlow::Node getOptions() {
result in [this.getArg(3), this.getArgByName("options")]
}
override predicate verifiesSignature() {
not this.hasVerifySetToFalse() and
not this.hasVerifySignatureSetToFalse()
}
predicate hasNoVerifyArgumentOrOptions() {
not exists(this.getArgByName("verify")) and not exists(this.getOptions())
}
predicate hasVerifySetToFalse() { isFalse(this.getArgByName("verify")) }
predicate hasVerifySignatureSetToFalse() {
exists(KeyValuePair optionsDict, NameConstant falseName |
falseName.getId() = "False" and
optionsDict = this.getOptions().asExpr().(Dict).getItem(_) and
optionsDict.getKey().(Str_).getS().matches("%verify%") and
falseName = optionsDict.getValue()
)
}
}
}

View File

@@ -0,0 +1,105 @@
private import python
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private import experimental.semmle.python.frameworks.JWT
private module PythonJose {
/** Gets a reference to `jwt` */
private API::Node joseJWT() { result = API::moduleImport("jose").getMember("jwt") }
/** Gets a reference to `jwt.encode` */
private API::Node joseJWTEncode() { result = joseJWT().getMember("encode") }
/** Gets a reference to `jwt.decode` */
private API::Node joseJWTDecode() { result = joseJWT().getMember("decode") }
/**
* Gets a call to `jwt.encode`.
*
* Given the following example:
*
* ```py
* jwt.encode(token, key="key", algorithm="HS256")
* ```
*
* * `this` would be `jwt.encode(token, key="key", algorithm="HS256")`.
* * `getPayload()`'s result would be `token`.
* * `getKey()`'s result would be `"key"`.
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
*/
private class JoseJWTEncodeCall extends DataFlow::CallCfgNode, JWTEncoding::Range {
JoseJWTEncodeCall() { this = joseJWTEncode().getACall() }
override DataFlow::Node getPayload() { result = this.getArg(0) }
override DataFlow::Node getKey() { result in [this.getArg(1), this.getArgByName("key")] }
override DataFlow::Node getAlgorithm() {
result in [this.getArg(2), this.getArgByName("algorithm")]
}
override string getAlgorithmString() {
exists(StrConst str |
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getAlgorithm()) and
result = str.getText()
)
}
}
/**
* Gets a call to `jwt.decode`.
*
* Given the following example:
*
* ```py
* jwt.decode(token, "key", "HS256")
* ```
*
* * `this` would be `jwt.decode(token, "key", "HS256")`.
* * `getPayload()`'s result would be `token`.
* * `getKey()`'s result would be `"key"`.
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
* * `getOptions()`'s result would be none.
* * `verifiesSignature()` predicate would succeed.
*/
private class JoseJWTDecodeCall extends DataFlow::CallCfgNode, JWTDecoding::Range {
JoseJWTDecodeCall() { this = joseJWTDecode().getACall() }
override DataFlow::Node getPayload() { result = this.getArg(0) }
override DataFlow::Node getKey() { result in [this.getArg(1), this.getArgByName("key")] }
override DataFlow::Node getAlgorithm() {
result in [this.getArg(2), this.getArgByName("algorithms")]
}
override string getAlgorithmString() {
exists(StrConst str |
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getAlgorithm()) and
result = str.getText()
)
}
override DataFlow::Node getOptions() {
result in [this.getArg(3), this.getArgByName("options")]
}
override predicate verifiesSignature() {
// jwt.decode(token, key, options={"verify_signature": False})
not this.hasVerifySignatureSetToFalse()
}
predicate hasNoOptions() { not exists(this.getOptions()) }
predicate hasVerifySignatureSetToFalse() {
exists(KeyValuePair optionsDict, NameConstant falseName |
falseName.getId() = "False" and
optionsDict = this.getOptions().asExpr().(Dict).getItem(_) and
optionsDict.getKey().(Str_).getS().matches("%verify%") and
falseName = optionsDict.getValue()
)
}
}
}

View File

@@ -0,0 +1,51 @@
private import python
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private module Python_JWT {
/**
* Gets a call to `python_jwt.process_jwt`.
*
* Given the following example:
*
* ```py
* python_jwt.process_jwt(token)
* python_jwt.verify_jwt(token, "key", "HS256")
* ```
*
* * `this` would be `jwt.process_jwt(token)`.
* * `getPayload()`'s result would be `token`.
* * `getKey()`'s result would be `"key"`.
* * `getAlgorithm()`'s result would be `"HS256"`.
* * `getAlgorithmstring()`'s result would be `HS256`.
* * `getOptions()`'s result would be `none()`.
* * `verifiesSignature()` predicate would succeed.
*/
private class PythonJwtProcessCall extends DataFlow::CallCfgNode, JWTDecoding::Range {
PythonJwtProcessCall() {
this = API::moduleImport("python_jwt").getMember("process_jwt").getACall()
}
private DataFlow::CallCfgNode verifyCall() {
result = API::moduleImport("python_jwt").getMember("verify_jwt").getACall() and
this.getPayload().getALocalSource() = result.getArg(0).getALocalSource()
}
override DataFlow::Node getPayload() { result = this.getArg(0) }
override DataFlow::Node getKey() { result = this.verifyCall().getArg(1) }
override DataFlow::Node getAlgorithm() { result = this.verifyCall().getArg(2) }
override string getAlgorithmString() {
exists(StrConst str |
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getAlgorithm()) and
result = str.getText()
)
}
override DataFlow::Node getOptions() { none() }
override predicate verifiesSignature() { exists(this.verifyCall()) }
}
}

View File

@@ -0,0 +1,37 @@
/**
* Provides a taint tracking configuration for reasoning about random
* values that are not cryptographically secure.
*
* Note, for performance reasons: only import this file if
* `InsecureRandomness::Configuration` is needed, otherwise
* `InsecureRandomnessCustomizations` should be imported instead.
*/
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
/**
* A taint tracking configuration for random values that are not cryptographically secure.
*/
module InsecureRandomness {
import InsecureRandomnessCustomizations::InsecureRandomness
/**
* A taint-tracking configuration for reasoning about random values that are
* not cryptographically secure.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "InsecureRandomness" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
}
}

View File

@@ -0,0 +1,79 @@
/**
* Provides default sources, sinks and sanitizers for reasoning about random values that are
* not cryptographically secure, as well as extension points for adding your own.
*/
private import python
private import semmle.python.ApiGraphs
private import semmle.python.Concepts
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.dataflow.new.internal.DataFlowPrivate
/**
* Provides default sources, sinks and sanitizers for reasoning about random values that are
* not cryptographically secure, as well as extension points for adding your own.
*/
module InsecureRandomness {
/**
* A data flow source for random values that are not cryptographically secure.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for random values that are not cryptographically secure.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for random values that are not cryptographically secure.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer guard for random values that are not cryptographically secure.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A random source that is not sufficient for security use. So far this is only made up
* of the math package's rand function, more insufficient random sources can be added here.
*/
class InsecureRandomSource extends Source {
InsecureRandomSource() {
this =
API::moduleImport("random")
.getMember([
"betavariate", "choice", "choices", "expovariate", "gammavariate", "gauss",
"getrandbits", "getstate", "lognormvariate", "normalvariate", "paretovariate",
"randbytes", "randint", "random", "randrange", "sample", "seed", "setstate",
"shuffle", "triangular", "uniform", "vonmisesvariate", "weibullvariate"
])
.getACall()
}
}
/**
* A use in a function that heuristically deals with unsafe random numbers or random strings.
*/
class RandomFnSink extends Sink {
RandomFnSink() {
exists(DataFlowCallable randomFn |
randomFn
.getName()
.regexpMatch("(?i).*(gen(erate)?|make|mk|create).*(nonce|salt|pepper|Password).*")
|
this.getEnclosingCallable() = randomFn
)
}
}
/**
* A cryptographic key, considered as a sink for random values that are not cryptographically
* secure.
*/
class CryptoKeySink extends Sink {
CryptoKeySink() {
exists(Cryptography::CryptographicOperation operation | this = operation.getAnInput())
}
}
}

View File

@@ -1,24 +0,0 @@
/**
* Provides a taint-tracking configuration for detecting LDAP injection vulnerabilities
*/
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
/**
* A taint-tracking configuration for detecting LDAP injections.
*/
class LDAPInjectionFlowConfig extends TaintTracking::Configuration {
LDAPInjectionFlowConfig() { this = "LDAPInjectionFlowConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink = any(LDAPQuery ldapQuery).getQuery() }
override predicate isSanitizer(DataFlow::Node sanitizer) {
sanitizer = any(LDAPEscape ldapEsc).getAnInput()
}
}

View File

@@ -1,24 +0,0 @@
import python
import semmle.python.Concepts
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
/**
* A taint-tracking configuration for tracking untrusted user input used in log entries.
*/
class LogInjectionFlowConfig extends TaintTracking::Configuration {
LogInjectionFlowConfig() { this = "LogInjectionFlowConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink = any(LogOutput logoutput).getAnInput() }
override predicate isSanitizer(DataFlow::Node node) {
exists(CallNode call |
node.asCfgNode() = call.getFunction().(AttrNode).getObject("replace") and
call.getArg(0).getNode().(StrConst).getText() in ["\r\n", "\n"]
)
}
}

View File

@@ -1,57 +1,54 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.DataFlow2
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.TaintTracking2
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.security.dataflow.ChainedConfigs12
import experimental.semmle.python.Concepts
import semmle.python.Concepts
/**
* A taint-tracking configuration for detecting string-to-dict conversions.
*/
class RFSToDictConfig extends TaintTracking::Configuration {
RFSToDictConfig() { this = "RFSToDictConfig" }
module NoSQLInjection {
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "NoSQLInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
source instanceof RemoteFlowSource and
state instanceof RemoteInput
}
override predicate isSink(DataFlow::Node sink) {
exists(Decoding decoding | decoding.getFormat() = "JSON" and sink = decoding.getOutput())
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
sink = any(NoSQLQuery noSQLQuery).getQuery() and
state instanceof ConvertedToDict
}
override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
// Block `RemoteInput` paths here, since they change state to `ConvertedToDict`
exists(Decoding decoding | decoding.getFormat() = "JSON" and node = decoding.getOutput()) and
state instanceof RemoteInput
}
override predicate isAdditionalFlowStep(
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
DataFlow::FlowState stateTo
) {
exists(Decoding decoding | decoding.getFormat() = "JSON" |
nodeFrom = decoding.getAnInput() and
nodeTo = decoding.getOutput()
) and
stateFrom instanceof RemoteInput and
stateTo instanceof ConvertedToDict
}
override predicate isSanitizer(DataFlow::Node sanitizer) {
sanitizer = any(NoSQLSanitizer noSQLSanitizer).getAnInput()
}
}
override predicate isSanitizer(DataFlow::Node sanitizer) {
sanitizer = any(NoSQLSanitizer noSQLSanitizer).getAnInput()
/** A flow state signifying remote input. */
class RemoteInput extends DataFlow::FlowState {
RemoteInput() { this = "RemoteInput" }
}
/** A flow state signifying remote input converted to a dictionary. */
class ConvertedToDict extends DataFlow::FlowState {
ConvertedToDict() { this = "ConvertedToDict" }
}
}
/**
* A taint-tracking configuration for detecting NoSQL injections (previously converted to a dict).
*/
class FromDataDictToSink extends TaintTracking2::Configuration {
FromDataDictToSink() { this = "FromDataDictToSink" }
override predicate isSource(DataFlow::Node source) {
exists(Decoding decoding | decoding.getFormat() = "JSON" and source = decoding.getOutput())
}
override predicate isSink(DataFlow::Node sink) { sink = any(NoSQLQuery noSQLQuery).getQuery() }
override predicate isSanitizer(DataFlow::Node sanitizer) {
sanitizer = any(NoSQLSanitizer noSQLSanitizer).getAnInput()
}
}
/**
* A predicate checking string-to-dict conversion and its arrival to a NoSQL injection sink.
*/
predicate noSQLInjectionFlow(CustomPathNode source, CustomPathNode sink) {
exists(
RFSToDictConfig config, DataFlow::PathNode mid1, DataFlow2::PathNode mid2,
FromDataDictToSink config2
|
config.hasFlowPath(source.asNode1(), mid1) and
config2.hasFlowPath(mid2, sink.asNode2()) and
mid1.getNode().asCfgNode() = mid2.getNode().asCfgNode()
)
}

View File

@@ -21,7 +21,7 @@ module XSLTInjection {
/**
* A kind of "taint", representing an untrusted XML string
*/
private class ExternalXmlStringKind extends ExternalStringKind {
deprecated private class ExternalXmlStringKind extends ExternalStringKind {
ExternalXmlStringKind() { this = "etree.XML string" }
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {

Some files were not shown because too many files have changed in this diff Show More