mirror of
https://github.com/github/codeql.git
synced 2025-12-20 10:46:30 +01:00
Merge branch 'github:main' into jorgectf/python/ldapinsecureauth
This commit is contained in:
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* @name Clear-text logging of sensitive information
|
||||
* @description OLD QUERY: Logging sensitive information without encryption or hashing can
|
||||
* expose it to an attacker.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/old/clear-text-logging-sensitive-data
|
||||
* @deprecated
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.Paths
|
||||
import semmle.python.dataflow.TaintTracking
|
||||
import semmle.python.security.SensitiveData
|
||||
import semmle.python.security.ClearText
|
||||
|
||||
class CleartextLoggingConfiguration extends TaintTracking::Configuration {
|
||||
CleartextLoggingConfiguration() { this = "ClearTextLogging" }
|
||||
|
||||
override predicate isSource(DataFlow::Node src, TaintKind kind) {
|
||||
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
|
||||
sink.asCfgNode() instanceof ClearTextLogging::Sink and
|
||||
kind instanceof SensitiveData
|
||||
}
|
||||
}
|
||||
|
||||
from CleartextLoggingConfiguration config, TaintedPathSource source, TaintedPathSink sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getSink(), source, sink, "Sensitive data returned by $@ is logged here.",
|
||||
source.getSource(), source.getCfgNode().(SensitiveData::Source).repr()
|
||||
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* @name Clear-text storage of sensitive information
|
||||
* @description OLD QUERY: Sensitive information stored without encryption or hashing can expose it to an
|
||||
* attacker.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @id py/old/clear-text-storage-sensitive-data
|
||||
* @deprecated
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.Paths
|
||||
import semmle.python.dataflow.TaintTracking
|
||||
import semmle.python.security.SensitiveData
|
||||
import semmle.python.security.ClearText
|
||||
|
||||
class CleartextStorageConfiguration extends TaintTracking::Configuration {
|
||||
CleartextStorageConfiguration() { this = "ClearTextStorage" }
|
||||
|
||||
override predicate isSource(DataFlow::Node src, TaintKind kind) {
|
||||
src.asCfgNode().(SensitiveData::Source).isSourceOf(kind)
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink, TaintKind kind) {
|
||||
sink.asCfgNode() instanceof ClearTextStorage::Sink and
|
||||
kind instanceof SensitiveData
|
||||
}
|
||||
}
|
||||
|
||||
from CleartextStorageConfiguration config, TaintedPathSource source, TaintedPathSink sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getSink(), source, sink, "Sensitive data from $@ is stored here.", source.getSource(),
|
||||
source.getCfgNode().(SensitiveData::Source).repr()
|
||||
@@ -1,28 +0,0 @@
|
||||
from django.conf.urls import url
|
||||
from clickhouse_driver import Client
|
||||
from clickhouse_driver import connect
|
||||
from aioch import Client as aiochClient
|
||||
|
||||
class MyClient(Client):
|
||||
def dummy(self):
|
||||
return None
|
||||
|
||||
def show_user(request, username):
|
||||
|
||||
# BAD -- Untrusted user input is directly injected into the sql query using async library 'aioch'
|
||||
aclient = aiochClient("localhost")
|
||||
progress = await aclient.execute_with_progress("SELECT * FROM users WHERE username = '%s'" % username)
|
||||
|
||||
# BAD -- Untrusted user input is directly injected into the sql query using native client of library 'clickhouse_driver'
|
||||
Client('localhost').execute("SELECT * FROM users WHERE username = '%s'" % username)
|
||||
|
||||
# GOOD -- query uses prepared statements
|
||||
query = "SELECT * FROM users WHERE username = %(username)s"
|
||||
Client('localhost').execute(query, {"username": username})
|
||||
|
||||
# BAD -- PEP249 interface
|
||||
conn = connect('clickhouse://localhost')
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT * FROM users WHERE username = '%s'" % username)
|
||||
|
||||
urlpatterns = [url(r'^users/(?P<username>[^/]+)$', show_user)]
|
||||
@@ -1,59 +0,0 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
If a database query (such as a SQL or NoSQL query) is built from
|
||||
user-provided data without sufficient sanitization, a user
|
||||
may be able to run malicious database queries.
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>
|
||||
Most database connector libraries offer a way of safely
|
||||
embedding untrusted data into a query by means of query parameters
|
||||
or prepared statements.
|
||||
</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>
|
||||
In the following snippet, a user is fetched from a <code>ClickHouse</code> database
|
||||
using five different queries. In the "BAD" cases the query is built directly from user-controlled data.
|
||||
In the "GOOD" case the user-controlled data is safely embedded into the query by using query parameters.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
In the first case, the query executed via aioch Client. aioch - is a module
|
||||
for asynchronous queries to database.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
In the second and third cases, the connection is established via `Client` class.
|
||||
This class implement different method to execute a query.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
In the forth case, good pattern is presented. Query parameters are send through
|
||||
second dict-like argument.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
In the fifth case, there is example of PEP249 interface usage.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
In the sixth case, there is custom Class usge which is a subclass of default Client.
|
||||
</p>
|
||||
|
||||
<sample src="ClickHouseSQLInjection.py" />
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/SQL_injection">SQL injection</a>.</li>
|
||||
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html">SQL Injection Prevention Cheat Sheet</a>.</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -1,22 +0,0 @@
|
||||
/**
|
||||
* @id py/yandex/clickhouse-sql-injection
|
||||
* @name Clickhouse SQL query built from user-controlled sources
|
||||
* @description Building a SQL query from user-controlled sources is vulnerable to insertion of
|
||||
* malicious SQL code by the user.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @precision high
|
||||
* @tags security
|
||||
* external/cwe/cwe-089
|
||||
* external/owasp/owasp-a1
|
||||
*/
|
||||
|
||||
import python
|
||||
import experimental.semmle.python.frameworks.ClickHouseDriver
|
||||
import semmle.python.security.dataflow.SqlInjection
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
|
||||
"a user-provided value"
|
||||
@@ -0,0 +1,6 @@
|
||||
import python
|
||||
import semmle.python.security.performance.SuperlinearBackTracking
|
||||
|
||||
from PolynomialBackTrackingTerm t
|
||||
where t.getLocation().getFile().getBaseName() = "KnownCVEs.py"
|
||||
select t.getRegex(), t, t.getReason()
|
||||
@@ -0,0 +1,108 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
|
||||
<qhelp>
|
||||
|
||||
<include src="ReDoSIntroduction.inc.qhelp" />
|
||||
|
||||
<example>
|
||||
<p>
|
||||
|
||||
Consider this use of a regular expression, which removes
|
||||
all leading and trailing whitespace in a string:
|
||||
|
||||
</p>
|
||||
|
||||
<sample language="python">
|
||||
re.sub(r"^\s+|\s+$", "", text) # BAD
|
||||
</sample>
|
||||
|
||||
<p>
|
||||
|
||||
The sub-expression <code>"\s+$"</code> will match the
|
||||
whitespace characters in <code>text</code> from left to right, but it
|
||||
can start matching anywhere within a whitespace sequence. This is
|
||||
problematic for strings that do <strong>not</strong> end with a whitespace
|
||||
character. Such a string will force the regular expression engine to
|
||||
process each whitespace sequence once per whitespace character in the
|
||||
sequence.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
This ultimately means that the time cost of trimming a
|
||||
string is quadratic in the length of the string. So a string like
|
||||
<code>"a b"</code> will take milliseconds to process, but a similar
|
||||
string with a million spaces instead of just one will take several
|
||||
minutes.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
Avoid this problem by rewriting the regular expression to
|
||||
not contain the ambiguity about when to start matching whitespace
|
||||
sequences. For instance, by using a negative look-behind
|
||||
(<code>^\s+|(?<!\s)\s+$</code>), or just by using the built-in strip
|
||||
method (<code>text.strip()</code>).
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
Note that the sub-expression <code>"^\s+"</code> is
|
||||
<strong>not</strong> problematic as the <code>^</code> anchor restricts
|
||||
when that sub-expression can start matching, and as the regular
|
||||
expression engine matches from left to right.
|
||||
|
||||
</p>
|
||||
|
||||
</example>
|
||||
|
||||
<example>
|
||||
|
||||
<p>
|
||||
|
||||
As a similar, but slightly subtler problem, consider the
|
||||
regular expression that matches lines with numbers, possibly written
|
||||
using scientific notation:
|
||||
</p>
|
||||
|
||||
<sample language="python">
|
||||
^0\.\d+E?\d+$ # BAD
|
||||
</sample>
|
||||
|
||||
<p>
|
||||
|
||||
The problem with this regular expression is in the
|
||||
sub-expression <code>\d+E?\d+</code> because the second
|
||||
<code>\d+</code> can start matching digits anywhere after the first
|
||||
match of the first <code>\d+</code> if there is no <code>E</code> in
|
||||
the input string.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
This is problematic for strings that do <strong>not</strong>
|
||||
end with a digit. Such a string will force the regular expression
|
||||
engine to process each digit sequence once per digit in the sequence,
|
||||
again leading to a quadratic time complexity.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
To make the processing faster, the regular expression
|
||||
should be rewritten such that the two <code>\d+</code> sub-expressions
|
||||
do not have overlapping matches: <code>^0\.\d+(E\d+)?$</code>.
|
||||
|
||||
</p>
|
||||
|
||||
</example>
|
||||
|
||||
<include src="ReDoSReferences.inc.qhelp"/>
|
||||
|
||||
</qhelp>
|
||||
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* @name Polynomial regular expression used on uncontrolled data
|
||||
* @description A regular expression that can require polynomial time
|
||||
* to match may be vulnerable to denial-of-service attacks.
|
||||
* @kind path-problem
|
||||
* @problem.severity warning
|
||||
* @precision high
|
||||
* @id py/polynomial-redos
|
||||
* @tags security
|
||||
* external/cwe/cwe-730
|
||||
* external/cwe/cwe-400
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.performance.SuperlinearBackTracking
|
||||
import semmle.python.security.dataflow.PolynomialReDoS
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from
|
||||
PolynomialReDoS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
|
||||
PolynomialReDoS::Sink sinkNode, PolynomialBackTrackingTerm regexp
|
||||
where
|
||||
config.hasFlowPath(source, sink) and
|
||||
sinkNode = sink.getNode() and
|
||||
regexp.getRootTerm() = sinkNode.getRegExp()
|
||||
// not (
|
||||
// source.getNode().(Source).getKind() = "url" and
|
||||
// regexp.isAtEndLine()
|
||||
// )
|
||||
select sinkNode.getHighlight(), source, sink,
|
||||
"This $@ that depends on $@ may run slow on strings " + regexp.getPrefixMessage() +
|
||||
"with many repetitions of '" + regexp.getPumpString() + "'.", regexp, "regular expression",
|
||||
source.getNode(), "a user-provided value"
|
||||
34
python/ql/src/experimental/Security/CWE-730/ReDoS.qhelp
Normal file
34
python/ql/src/experimental/Security/CWE-730/ReDoS.qhelp
Normal file
@@ -0,0 +1,34 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
|
||||
<qhelp>
|
||||
|
||||
<include src="ReDoSIntroduction.inc.qhelp" />
|
||||
|
||||
<example>
|
||||
<p>
|
||||
Consider this regular expression:
|
||||
</p>
|
||||
<sample language="python">
|
||||
^_(__|.)+_$
|
||||
</sample>
|
||||
<p>
|
||||
Its sub-expression <code>"(__|.)+?"</code> can match the string <code>"__"</code> either by the
|
||||
first alternative <code>"__"</code> to the left of the <code>"|"</code> operator, or by two
|
||||
repetitions of the second alternative <code>"."</code> to the right. Thus, a string consisting
|
||||
of an odd number of underscores followed by some other character will cause the regular
|
||||
expression engine to run for an exponential amount of time before rejecting the input.
|
||||
</p>
|
||||
<p>
|
||||
This problem can be avoided by rewriting the regular expression to remove the ambiguity between
|
||||
the two branches of the alternative inside the repetition:
|
||||
</p>
|
||||
<sample language="python">
|
||||
^_(__|[^_])+_$
|
||||
</sample>
|
||||
</example>
|
||||
|
||||
<include src="ReDoSReferences.inc.qhelp"/>
|
||||
|
||||
</qhelp>
|
||||
25
python/ql/src/experimental/Security/CWE-730/ReDoS.ql
Normal file
25
python/ql/src/experimental/Security/CWE-730/ReDoS.ql
Normal file
@@ -0,0 +1,25 @@
|
||||
/**
|
||||
* @name Inefficient regular expression
|
||||
* @description A regular expression that requires exponential time to match certain inputs
|
||||
* can be a performance bottleneck, and may be vulnerable to denial-of-service
|
||||
* attacks.
|
||||
* @kind problem
|
||||
* @problem.severity error
|
||||
* @precision high
|
||||
* @id py/redos
|
||||
* @tags security
|
||||
* external/cwe/cwe-730
|
||||
* external/cwe/cwe-400
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.performance.ExponentialBackTracking
|
||||
|
||||
from RegExpTerm t, string pump, State s, string prefixMsg
|
||||
where
|
||||
hasReDoSResult(t, pump, s, prefixMsg) and
|
||||
// exclude verbose mode regexes for now
|
||||
not t.getRegex().getAMode() = "VERBOSE"
|
||||
select t,
|
||||
"This part of the regular expression may cause exponential backtracking on strings " + prefixMsg +
|
||||
"containing many repetitions of '" + pump + "'."
|
||||
@@ -0,0 +1,54 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>
|
||||
|
||||
Some regular expressions take a long time to match certain
|
||||
input strings to the point where the time it takes to match a string
|
||||
of length <i>n</i> is proportional to <i>n<sup>k</sup></i> or even
|
||||
<i>2<sup>n</sup></i>. Such regular expressions can negatively affect
|
||||
performance, or even allow a malicious user to perform a Denial of
|
||||
Service ("DoS") attack by crafting an expensive input string for the
|
||||
regular expression to match.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
The regular expression engine provided by Python uses a backtracking non-deterministic finite
|
||||
automata to implement regular expression matching. While this approach
|
||||
is space-efficient and allows supporting advanced features like
|
||||
capture groups, it is not time-efficient in general. The worst-case
|
||||
time complexity of such an automaton can be polynomial or even
|
||||
exponential, meaning that for strings of a certain shape, increasing
|
||||
the input length by ten characters may make the automaton about 1000
|
||||
times slower.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
Typically, a regular expression is affected by this
|
||||
problem if it contains a repetition of the form <code>r*</code> or
|
||||
<code>r+</code> where the sub-expression <code>r</code> is ambiguous
|
||||
in the sense that it can match some string in multiple ways. More
|
||||
information about the precise circumstances can be found in the
|
||||
references.
|
||||
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
|
||||
<p>
|
||||
|
||||
Modify the regular expression to remove the ambiguity, or
|
||||
ensure that the strings matched with the regular expression are short
|
||||
enough that the time-complexity does not matter.
|
||||
|
||||
</p>
|
||||
|
||||
</recommendation>
|
||||
</qhelp>
|
||||
@@ -0,0 +1,16 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<references>
|
||||
<li>
|
||||
OWASP:
|
||||
<a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.
|
||||
</li>
|
||||
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.</li>
|
||||
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/Time_complexity">Time complexity</a>.</li>
|
||||
<li>James Kirrage, Asiri Rathnayake, Hayo Thielecke:
|
||||
<a href="http://www.cs.bham.ac.uk/~hxt/research/reg-exp-sec.pdf">Static Analysis for Regular Expression Denial-of-Service Attack</a>.
|
||||
</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -199,3 +199,36 @@ class LDAPBind extends DataFlow::Node {
|
||||
*/
|
||||
predicate useSSL() { range.useSSL() }
|
||||
}
|
||||
|
||||
/** Provides classes for modeling SQL sanitization libraries. */
|
||||
module SQLEscape {
|
||||
/**
|
||||
* A data-flow node that collects functions that escape SQL statements.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `SQLEscape` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/**
|
||||
* Gets the argument containing the raw SQL statement.
|
||||
*/
|
||||
abstract DataFlow::Node getAnInput();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that collects functions escaping SQL statements.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `SQLEscape::Range` instead.
|
||||
*/
|
||||
class SQLEscape extends DataFlow::Node {
|
||||
SQLEscape::Range range;
|
||||
|
||||
SQLEscape() { this = range }
|
||||
|
||||
/**
|
||||
* Gets the argument containing the raw SQL statement.
|
||||
*/
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
}
|
||||
|
||||
@@ -1,85 +0,0 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of `clickhouse-driver` and `aioch` PyPI packages.
|
||||
* See
|
||||
* - https://pypi.org/project/clickhouse-driver/
|
||||
* - https://pypi.org/project/aioch/
|
||||
* - https://clickhouse-driver.readthedocs.io/en/latest/
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
private import semmle.python.frameworks.PEP249
|
||||
|
||||
/**
|
||||
* Provides models for `clickhouse-driver` and `aioch` PyPI packages.
|
||||
* See
|
||||
* - https://pypi.org/project/clickhouse-driver/
|
||||
* - https://pypi.org/project/aioch/
|
||||
* - https://clickhouse-driver.readthedocs.io/en/latest/
|
||||
*/
|
||||
module ClickHouseDriver {
|
||||
/** Gets a reference to the `clickhouse_driver` module. */
|
||||
API::Node clickhouse_driver() { result = API::moduleImport("clickhouse_driver") }
|
||||
|
||||
/** Gets a reference to the `aioch` module. This module allows to make async db queries. */
|
||||
API::Node aioch() { result = API::moduleImport("aioch") }
|
||||
|
||||
/**
|
||||
* `clickhouse_driver` implements PEP249,
|
||||
* providing ways to execute SQL statements against a database.
|
||||
*/
|
||||
class ClickHouseDriverPEP249 extends PEP249ModuleApiNode {
|
||||
ClickHouseDriverPEP249() { this = clickhouse_driver() }
|
||||
}
|
||||
|
||||
module Client {
|
||||
/** Gets a reference to a Client call. */
|
||||
private DataFlow::Node client_ref() {
|
||||
result = clickhouse_driver().getMember("Client").getASubclass*().getAUse()
|
||||
or
|
||||
result = aioch().getMember("Client").getASubclass*().getAUse()
|
||||
}
|
||||
|
||||
/** A direct instantiation of `clickhouse_driver.Client`. */
|
||||
private class ClientInstantiation extends DataFlow::CallCfgNode {
|
||||
ClientInstantiation() { this.getFunction() = client_ref() }
|
||||
}
|
||||
|
||||
/** Gets a reference to an instance of `clickhouse_driver.Client`. */
|
||||
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result instanceof ClientInstantiation
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to an instance of `clickhouse_driver.Client`. */
|
||||
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
|
||||
}
|
||||
|
||||
/** clickhouse_driver.Client execute methods */
|
||||
private string execute_function() {
|
||||
result in ["execute_with_progress", "execute", "execute_iter"]
|
||||
}
|
||||
|
||||
/** Gets a reference to the `clickhouse_driver.Client.execute` method */
|
||||
private DataFlow::LocalSourceNode clickhouse_execute(DataFlow::TypeTracker t) {
|
||||
t.startInAttr(execute_function()) and
|
||||
result = Client::instance()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = clickhouse_execute(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `clickhouse_driver.Client.execute` method */
|
||||
DataFlow::Node clickhouse_execute() {
|
||||
clickhouse_execute(DataFlow::TypeTracker::end()).flowsTo(result)
|
||||
}
|
||||
|
||||
/** A call to the `clickhouse_driver.Client.execute` method */
|
||||
private class ExecuteCall extends SqlExecution::Range, DataFlow::CallCfgNode {
|
||||
ExecuteCall() { this.getFunction() = clickhouse_execute() }
|
||||
|
||||
override DataFlow::Node getSql() { result.asCfgNode() = node.getArg(0) }
|
||||
}
|
||||
}
|
||||
@@ -26,7 +26,7 @@ private module LDAP {
|
||||
API::Node ldapInitialize() { result = ldap().getMember("initialize") }
|
||||
|
||||
/** Gets a reference to a `ldap` operation. */
|
||||
private DataFlow::LocalSourceNode ldapOperation(DataFlow::TypeTracker t) {
|
||||
private DataFlow::TypeTrackingNode ldapOperation(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result.(DataFlow::AttrRead).getObject().getALocalSource() = ldapInitialize().getACall()
|
||||
or
|
||||
|
||||
@@ -0,0 +1,148 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the 'SqlAlchemy' package.
|
||||
* See https://pypi.org/project/SQLAlchemy/.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.ApiGraphs
|
||||
private import semmle.python.Concepts
|
||||
private import experimental.semmle.python.Concepts
|
||||
|
||||
private module SqlAlchemy {
|
||||
/**
|
||||
* Returns an instantization of a SqlAlchemy Session object.
|
||||
* See https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session and
|
||||
* https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.sessionmaker
|
||||
*/
|
||||
private API::Node getSqlAlchemySessionInstance() {
|
||||
result = API::moduleImport("sqlalchemy.orm").getMember("Session").getReturn() or
|
||||
result = API::moduleImport("sqlalchemy.orm").getMember("sessionmaker").getReturn().getReturn()
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an instantization of a SqlAlchemy Engine object.
|
||||
* See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine
|
||||
*/
|
||||
private API::Node getSqlAlchemyEngineInstance() {
|
||||
result = API::moduleImport("sqlalchemy").getMember("create_engine").getReturn()
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an instantization of a SqlAlchemy Query object.
|
||||
* See https://docs.sqlalchemy.org/en/14/orm/query.html?highlight=query#sqlalchemy.orm.Query
|
||||
*/
|
||||
private API::Node getSqlAlchemyQueryInstance() {
|
||||
result = getSqlAlchemySessionInstance().getMember("query").getReturn()
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `execute` meant to execute an SQL expression
|
||||
* See the following links:
|
||||
* - https://docs.sqlalchemy.org/en/14/core/connections.html?highlight=execute#sqlalchemy.engine.Connection.execute
|
||||
* - https://docs.sqlalchemy.org/en/14/core/connections.html?highlight=execute#sqlalchemy.engine.Engine.execute
|
||||
* - https://docs.sqlalchemy.org/en/14/orm/session_api.html?highlight=execute#sqlalchemy.orm.Session.execute
|
||||
*/
|
||||
private class SqlAlchemyExecuteCall extends DataFlow::CallCfgNode, SqlExecution::Range {
|
||||
SqlAlchemyExecuteCall() {
|
||||
// new way
|
||||
this = getSqlAlchemySessionInstance().getMember("execute").getACall() or
|
||||
this =
|
||||
getSqlAlchemyEngineInstance()
|
||||
.getMember(["connect", "begin"])
|
||||
.getReturn()
|
||||
.getMember("execute")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getSql() { result = this.getArg(0) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `scalar` meant to execute an SQL expression
|
||||
* See https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session.scalar and
|
||||
* https://docs.sqlalchemy.org/en/14/core/connections.html?highlight=scalar#sqlalchemy.engine.Engine.scalar
|
||||
*/
|
||||
private class SqlAlchemyScalarCall extends DataFlow::CallCfgNode, SqlExecution::Range {
|
||||
SqlAlchemyScalarCall() {
|
||||
this =
|
||||
[getSqlAlchemySessionInstance(), getSqlAlchemyEngineInstance()]
|
||||
.getMember("scalar")
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getSql() { result = this.getArg(0) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call on a Query object
|
||||
* See https://docs.sqlalchemy.org/en/14/orm/query.html?highlight=query#sqlalchemy.orm.Query
|
||||
*/
|
||||
private class SqlAlchemyQueryCall extends DataFlow::CallCfgNode, SqlExecution::Range {
|
||||
SqlAlchemyQueryCall() {
|
||||
this =
|
||||
getSqlAlchemyQueryInstance()
|
||||
.getMember(any(SqlAlchemyVulnerableMethodNames methodName))
|
||||
.getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getSql() { result = this.getArg(0) }
|
||||
}
|
||||
|
||||
/**
|
||||
* This class represents a list of methods vulnerable to sql injection.
|
||||
*
|
||||
* See https://github.com/jty-team/codeql/pull/2#issue-611592361
|
||||
*/
|
||||
private class SqlAlchemyVulnerableMethodNames extends string {
|
||||
SqlAlchemyVulnerableMethodNames() { this in ["filter", "filter_by", "group_by", "order_by"] }
|
||||
}
|
||||
|
||||
/**
|
||||
* Additional taint-steps for `sqlalchemy.text()`
|
||||
*
|
||||
* See https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text
|
||||
* See https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.TextClause
|
||||
*/
|
||||
class SqlAlchemyTextAdditionalTaintSteps extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
exists(DataFlow::CallCfgNode call |
|
||||
(
|
||||
call = API::moduleImport("sqlalchemy").getMember("text").getACall()
|
||||
or
|
||||
call = API::moduleImport("sqlalchemy").getMember("sql").getMember("text").getACall()
|
||||
or
|
||||
call =
|
||||
API::moduleImport("sqlalchemy")
|
||||
.getMember("sql")
|
||||
.getMember("expression")
|
||||
.getMember("text")
|
||||
.getACall()
|
||||
or
|
||||
call =
|
||||
API::moduleImport("sqlalchemy")
|
||||
.getMember("sql")
|
||||
.getMember("expression")
|
||||
.getMember("TextClause")
|
||||
.getACall()
|
||||
) and
|
||||
nodeFrom in [call.getArg(0), call.getArgByName("text")] and
|
||||
nodeTo = call
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to `sqlescapy.sqlescape`.
|
||||
*
|
||||
* See https://pypi.org/project/sqlescapy/
|
||||
*/
|
||||
class SQLEscapySanitizerCall extends DataFlow::CallCfgNode, SQLEscape::Range {
|
||||
SQLEscapySanitizerCall() {
|
||||
this = API::moduleImport("sqlescapy").getMember("sqlescape").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
}
|
||||
}
|
||||
@@ -64,15 +64,14 @@ private module Re {
|
||||
*
|
||||
* See https://docs.python.org/3/library/re.html#regular-expression-objects
|
||||
*/
|
||||
private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
|
||||
private class CompiledRegex extends DataFlow::MethodCallNode, RegexExecution::Range {
|
||||
DataFlow::Node regexNode;
|
||||
|
||||
CompiledRegex() {
|
||||
exists(DataFlow::CallCfgNode patternCall, DataFlow::AttrRead reMethod |
|
||||
this.getFunction() = reMethod and
|
||||
exists(DataFlow::MethodCallNode patternCall |
|
||||
patternCall = API::moduleImport("re").getMember("compile").getACall() and
|
||||
patternCall.flowsTo(reMethod.getObject()) and
|
||||
reMethod.getAttributeName() instanceof RegexExecutionMethods and
|
||||
patternCall.flowsTo(this.getObject()) and
|
||||
this.getMethodName() instanceof RegexExecutionMethods and
|
||||
regexNode = patternCall.getArg(0)
|
||||
)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user