Merge branch 'main' into markupsafe-modeling

This commit is contained in:
Rasmus Wriedt Larsen
2021-06-30 13:55:08 +02:00
1464 changed files with 186473 additions and 17643 deletions

View File

@@ -4,9 +4,10 @@
* @kind problem
* @tags security
* correctness
* security/cwe/cwe-78
* security/cwe/cwe-94
* security/cwe/cwe-95
* @problem.severity error
* @security-severity 5.9
* @security-severity 9.8
* @sub-severity high
* @precision high
* @id py/use-of-input

View File

@@ -6,7 +6,7 @@
* @tags security
* external/cwe/cwe-200
* @problem.severity error
* @security-severity 3.6
* @security-severity 6.5
* @sub-severity low
* @precision high
* @id py/bind-socket-all-network-interfaces

View File

@@ -5,7 +5,7 @@
* @kind path-problem
* @precision low
* @problem.severity error
* @security-severity 5.9
* @security-severity 7.8
* @tags security external/cwe/cwe-20
*/

View File

@@ -3,7 +3,7 @@
* @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected.
* @kind problem
* @problem.severity warning
* @security-severity 5.9
* @security-severity 7.8
* @precision high
* @id py/incomplete-hostname-regexp
* @tags correctness

View File

@@ -3,7 +3,7 @@
* @description Security checks on the substrings of an unparsed URL are often vulnerable to bypassing.
* @kind problem
* @problem.severity warning
* @security-severity 5.9
* @security-severity 7.8
* @precision high
* @id py/incomplete-url-substring-sanitization
* @tags correctness

View File

@@ -3,7 +3,7 @@
* @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
* @kind path-problem
* @problem.severity error
* @security-severity 6.4
* @security-severity 7.5
* @sub-severity high
* @precision high
* @id py/path-injection

View File

@@ -6,7 +6,7 @@
* @kind path-problem
* @id py/tarslip
* @problem.severity error
* @security-severity 6.4
* @security-severity 7.5
* @precision medium
* @tags security
* external/cwe/cwe-022

View File

@@ -4,7 +4,7 @@
* user to change the meaning of the command.
* @kind path-problem
* @problem.severity error
* @security-severity 5.9
* @security-severity 9.8
* @sub-severity high
* @precision high
* @id py/command-line-injection

View File

@@ -4,7 +4,7 @@
* cause a cross-site scripting vulnerability.
* @kind problem
* @problem.severity error
* @security-severity 2.9
* @security-severity 6.1
* @precision medium
* @id py/jinja2/autoescape-false
* @tags security

View File

@@ -4,7 +4,7 @@
* allows for a cross-site scripting vulnerability.
* @kind path-problem
* @problem.severity error
* @security-severity 2.9
* @security-severity 6.1
* @sub-severity high
* @precision high
* @id py/reflective-xss

View File

@@ -4,7 +4,7 @@
* malicious SQL code by the user.
* @kind path-problem
* @problem.severity error
* @security-severity 6.4
* @security-severity 8.8
* @precision high
* @id py/sql-injection
* @tags security

View File

@@ -4,7 +4,7 @@
* code execution.
* @kind path-problem
* @problem.severity error
* @security-severity 10.0
* @security-severity 9.3
* @sub-severity high
* @precision high
* @id py/code-injection

View File

@@ -5,7 +5,7 @@
* developing a subsequent exploit.
* @kind path-problem
* @problem.severity error
* @security-severity 3.6
* @security-severity 5.4
* @precision high
* @id py/stack-trace-exposure
* @tags security

View File

@@ -3,7 +3,7 @@
* @description Running a Flask app in debug mode may allow an attacker to run arbitrary code through the Werkzeug debugger.
* @kind problem
* @problem.severity error
* @security-severity 6.4
* @security-severity 7.5
* @precision high
* @id py/flask-debug
* @tags security

View File

@@ -3,7 +3,7 @@
* @description Accepting unknown host keys can allow man-in-the-middle attacks.
* @kind problem
* @problem.severity error
* @security-severity 5.2
* @security-severity 7.5
* @precision high
* @id py/paramiko-missing-host-key-validation
* @tags security

View File

@@ -3,7 +3,7 @@
* @description Making a request without certificate validation can allow man-in-the-middle attacks.
* @kind problem
* @problem.severity error
* @security-severity 5.2
* @security-severity 7.5
* @precision medium
* @id py/request-without-cert-validation
* @tags security

View File

@@ -4,7 +4,7 @@
* expose it to an attacker.
* @kind path-problem
* @problem.severity error
* @security-severity 5.9
* @security-severity 7.5
* @precision high
* @id py/clear-text-logging-sensitive-data
* @tags security

View File

@@ -4,7 +4,7 @@
* attacker.
* @kind path-problem
* @problem.severity error
* @security-severity 5.9
* @security-severity 7.5
* @precision high
* @id py/clear-text-storage-sensitive-data
* @tags security

View File

@@ -3,7 +3,7 @@
* @description Use of a cryptographic key that is too small may allow the encryption to be broken.
* @kind problem
* @problem.severity error
* @security-severity 5.2
* @security-severity 7.5
* @precision high
* @id py/weak-crypto-key
* @tags security

View File

@@ -3,7 +3,7 @@
* @description Using broken or weak cryptographic algorithms can compromise security.
* @kind problem
* @problem.severity warning
* @security-severity 5.2
* @security-severity 7.5
* @precision high
* @id py/weak-cryptographic-algorithm
* @tags security

View File

@@ -5,7 +5,7 @@
* @id py/insecure-default-protocol
* @kind problem
* @problem.severity warning
* @security-severity 5.2
* @security-severity 7.5
* @precision high
* @tags security
* external/cwe/cwe-327

View File

@@ -4,7 +4,7 @@
* @id py/insecure-protocol
* @kind problem
* @problem.severity warning
* @security-severity 5.2
* @security-severity 7.5
* @precision high
* @tags security
* external/cwe/cwe-327
@@ -27,37 +27,33 @@ class ProtocolConfiguration extends DataFlow::Node {
unsafe_context_creation(this, _)
}
AstNode getNode() { result = this.asCfgNode().(CallNode).getFunction().getNode() }
DataFlow::Node getNode() { result = this.(DataFlow::CallCfgNode).getFunction() }
}
// Helper for pretty printer `callName`.
// This is a consequence of missing pretty priting.
// We do not want to evaluate our bespoke pretty printer
// for all `AstNode`s so we define a sub class of interesting ones.
//
// Note that AstNode is abstract and AstNode_ is a library class, so
// we have to extend @py_ast_node.
class Nameable extends @py_ast_node {
// for all `DataFlow::Node`s so we define a sub class of interesting ones.
class Nameable extends DataFlow::Node {
Nameable() {
this = any(ProtocolConfiguration pc).getNode()
or
exists(Nameable attr | this = attr.(Attribute).getObject())
this = any(Nameable attr).(DataFlow::AttrRef).getObject()
}
string toString() { result = "AstNode" }
}
string callName(Nameable call) {
result = call.(Name).getId()
result = call.asExpr().(Name).getId()
or
exists(Attribute a | a = call | result = callName(a.getObject()) + "." + a.getName())
exists(DataFlow::AttrRef a | a = call |
result = callName(a.getObject()) + "." + a.getAttributeName()
)
}
string configName(ProtocolConfiguration protocolConfiguration) {
result =
"call to " + callName(protocolConfiguration.asCfgNode().(CallNode).getFunction().getNode())
result = "call to " + callName(protocolConfiguration.(DataFlow::CallCfgNode).getFunction())
or
not protocolConfiguration.asCfgNode() instanceof CallNode and
not protocolConfiguration instanceof DataFlow::CallCfgNode and
not protocolConfiguration instanceof ContextCreation and
result = "context modification"
}

View File

@@ -13,12 +13,12 @@ class PyOpenSSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
}
override string getProtocol() {
exists(ControlFlowNode protocolArg, PyOpenSSL pyo |
protocolArg in [node.getArg(0), node.getArgByName("method")]
exists(DataFlow::Node protocolArg, PyOpenSSL pyo |
protocolArg in [this.getArg(0), this.getArgByName("method")]
|
protocolArg =
[pyo.specific_version(result).getAUse(), pyo.unspecific_version(result).getAUse()]
.asCfgNode()
protocolArg in [
pyo.specific_version(result).getAUse(), pyo.unspecific_version(result).getAUse()
]
)
}
}
@@ -29,7 +29,7 @@ class ConnectionCall extends ConnectionCreation, DataFlow::CallCfgNode {
}
override DataFlow::CfgNode getContext() {
result.getNode() in [node.getArg(0), node.getArgByName("context")]
result in [this.getArg(0), this.getArgByName("context")]
}
}
@@ -43,8 +43,8 @@ class SetOptionsCall extends ProtocolRestriction, DataFlow::CallCfgNode {
}
override ProtocolVersion getRestriction() {
API::moduleImport("OpenSSL").getMember("SSL").getMember("OP_NO_" + result).getAUse().asCfgNode() in [
node.getArg(0), node.getArgByName("options")
API::moduleImport("OpenSSL").getMember("SSL").getMember("OP_NO_" + result).getAUse() in [
this.getArg(0), this.getArgByName("options")
]
}
}

View File

@@ -11,15 +11,15 @@ class SSLContextCreation extends ContextCreation, DataFlow::CallCfgNode {
SSLContextCreation() { this = API::moduleImport("ssl").getMember("SSLContext").getACall() }
override string getProtocol() {
exists(ControlFlowNode protocolArg, Ssl ssl |
protocolArg in [node.getArg(0), node.getArgByName("protocol")]
exists(DataFlow::Node protocolArg, Ssl ssl |
protocolArg in [this.getArg(0), this.getArgByName("protocol")]
|
protocolArg =
[ssl.specific_version(result).getAUse(), ssl.unspecific_version(result).getAUse()]
.asCfgNode()
)
or
not exists(node.getAnArg()) and
not exists(this.getArg(_)) and
not exists(this.getArgByName(_)) and
result = "TLS"
}
}
@@ -39,12 +39,10 @@ API::Node sslContextInstance() {
result = API::moduleImport("ssl").getMember(["SSLContext", "create_default_context"]).getReturn()
}
class WrapSocketCall extends ConnectionCreation, DataFlow::CallCfgNode {
class WrapSocketCall extends ConnectionCreation, DataFlow::MethodCallNode {
WrapSocketCall() { this = sslContextInstance().getMember("wrap_socket").getACall() }
override DataFlow::Node getContext() {
result = this.getFunction().(DataFlow::AttrRead).getObject()
}
override DataFlow::Node getContext() { result = this.getObject() }
}
class OptionsAugOr extends ProtocolRestriction, DataFlow::CfgNode {
@@ -133,7 +131,7 @@ class ContextSetVersion extends ProtocolRestriction, ProtocolUnrestriction, Data
ContextSetVersion() {
exists(DataFlow::AttrWrite aw |
aw.getObject().asCfgNode() = node and
this = aw.getObject() and
aw.getAttributeName() = "minimum_version" and
aw.getValue() =
API::moduleImport("ssl").getMember("TLSVersion").getMember(restriction).getAUse()

View File

@@ -3,7 +3,7 @@
* @description Using broken or weak cryptographic hashing algorithms can compromise security.
* @kind path-problem
* @problem.severity warning
* @security-severity 5.9
* @security-severity 7.5
* @precision high
* @id py/weak-sensitive-data-hashing
* @tags security

View File

@@ -4,7 +4,7 @@
* @kind problem
* @id py/insecure-temporary-file
* @problem.severity error
* @security-severity 5.9
* @security-severity 7.0
* @sub-severity high
* @precision high
* @tags external/cwe/cwe-377

View File

@@ -4,7 +4,7 @@
* @kind path-problem
* @id py/unsafe-deserialization
* @problem.severity error
* @security-severity 5.9
* @security-severity 9.8
* @sub-severity high
* @precision high
* @tags external/cwe/cwe-502

View File

@@ -4,7 +4,7 @@
* may cause redirection to malicious web sites.
* @kind path-problem
* @problem.severity error
* @security-severity 2.7
* @security-severity 6.1
* @sub-severity low
* @id py/url-redirection
* @tags security

View File

@@ -4,7 +4,7 @@
* @kind problem
* @id py/overly-permissive-file
* @problem.severity warning
* @security-severity 5.9
* @security-severity 7.8
* @sub-severity high
* @precision medium
* @tags external/cwe/cwe-732

View File

@@ -3,7 +3,7 @@
* @description Credentials are hard coded in the source code of the application.
* @kind path-problem
* @problem.severity error
* @security-severity 5.9
* @security-severity 9.8
* @precision medium
* @id py/hardcoded-credentials
* @tags security

View File

@@ -1,28 +0,0 @@
from django.conf.urls import url
from clickhouse_driver import Client
from clickhouse_driver import connect
from aioch import Client as aiochClient
class MyClient(Client):
def dummy(self):
return None
def show_user(request, username):
# BAD -- Untrusted user input is directly injected into the sql query using async library 'aioch'
aclient = aiochClient("localhost")
progress = await aclient.execute_with_progress("SELECT * FROM users WHERE username = '%s'" % username)
# BAD -- Untrusted user input is directly injected into the sql query using native client of library 'clickhouse_driver'
Client('localhost').execute("SELECT * FROM users WHERE username = '%s'" % username)
# GOOD -- query uses prepared statements
query = "SELECT * FROM users WHERE username = %(username)s"
Client('localhost').execute(query, {"username": username})
# BAD -- PEP249 interface
conn = connect('clickhouse://localhost')
cursor = conn.cursor()
cursor.execute("SELECT * FROM users WHERE username = '%s'" % username)
urlpatterns = [url(r'^users/(?P<username>[^/]+)$', show_user)]

View File

@@ -1,59 +0,0 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
If a database query (such as a SQL or NoSQL query) is built from
user-provided data without sufficient sanitization, a user
may be able to run malicious database queries.
</p>
</overview>
<recommendation>
<p>
Most database connector libraries offer a way of safely
embedding untrusted data into a query by means of query parameters
or prepared statements.
</p>
</recommendation>
<example>
<p>
In the following snippet, a user is fetched from a <code>ClickHouse</code> database
using five different queries. In the "BAD" cases the query is built directly from user-controlled data.
In the "GOOD" case the user-controlled data is safely embedded into the query by using query parameters.
</p>
<p>
In the first case, the query executed via aioch Client. aioch - is a module
for asynchronous queries to database.
</p>
<p>
In the second and third cases, the connection is established via `Client` class.
This class implement different method to execute a query.
</p>
<p>
In the forth case, good pattern is presented. Query parameters are send through
second dict-like argument.
</p>
<p>
In the fifth case, there is example of PEP249 interface usage.
</p>
<p>
In the sixth case, there is custom Class usge which is a subclass of default Client.
</p>
<sample src="ClickHouseSQLInjection.py" />
</example>
<references>
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/SQL_injection">SQL injection</a>.</li>
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html">SQL Injection Prevention Cheat Sheet</a>.</li>
</references>
</qhelp>

View File

@@ -1,22 +0,0 @@
/**
* @id py/yandex/clickhouse-sql-injection
* @name Clickhouse SQL query built from user-controlled sources
* @description Building a SQL query from user-controlled sources is vulnerable to insertion of
* malicious SQL code by the user.
* @kind path-problem
* @problem.severity error
* @precision high
* @tags security
* external/cwe/cwe-089
* external/owasp/owasp-a1
*/
import python
import experimental.semmle.python.frameworks.ClickHouseDriver
import semmle.python.security.dataflow.SqlInjection
import DataFlow::PathGraph
from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -1,85 +0,0 @@
/**
* Provides classes modeling security-relevant aspects of `clickhouse-driver` and `aioch` PyPI packages.
* See
* - https://pypi.org/project/clickhouse-driver/
* - https://pypi.org/project/aioch/
* - https://clickhouse-driver.readthedocs.io/en/latest/
*/
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
/**
* Provides models for `clickhouse-driver` and `aioch` PyPI packages.
* See
* - https://pypi.org/project/clickhouse-driver/
* - https://pypi.org/project/aioch/
* - https://clickhouse-driver.readthedocs.io/en/latest/
*/
module ClickHouseDriver {
/** Gets a reference to the `clickhouse_driver` module. */
API::Node clickhouse_driver() { result = API::moduleImport("clickhouse_driver") }
/** Gets a reference to the `aioch` module. This module allows to make async db queries. */
API::Node aioch() { result = API::moduleImport("aioch") }
/**
* `clickhouse_driver` implements PEP249,
* providing ways to execute SQL statements against a database.
*/
class ClickHouseDriverPEP249 extends PEP249ModuleApiNode {
ClickHouseDriverPEP249() { this = clickhouse_driver() }
}
module Client {
/** Gets a reference to a Client call. */
private DataFlow::Node client_ref() {
result = clickhouse_driver().getMember("Client").getASubclass*().getAUse()
or
result = aioch().getMember("Client").getASubclass*().getAUse()
}
/** A direct instantiation of `clickhouse_driver.Client`. */
private class ClientInstantiation extends DataFlow::CallCfgNode {
ClientInstantiation() { this.getFunction() = client_ref() }
}
/** Gets a reference to an instance of `clickhouse_driver.Client`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof ClientInstantiation
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `clickhouse_driver.Client`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/** clickhouse_driver.Client execute methods */
private string execute_function() {
result in ["execute_with_progress", "execute", "execute_iter"]
}
/** Gets a reference to the `clickhouse_driver.Client.execute` method */
private DataFlow::LocalSourceNode clickhouse_execute(DataFlow::TypeTracker t) {
t.startInAttr(execute_function()) and
result = Client::instance()
or
exists(DataFlow::TypeTracker t2 | result = clickhouse_execute(t2).track(t2, t))
}
/** Gets a reference to the `clickhouse_driver.Client.execute` method */
DataFlow::Node clickhouse_execute() {
clickhouse_execute(DataFlow::TypeTracker::end()).flowsTo(result)
}
/** A call to the `clickhouse_driver.Client.execute` method */
private class ExecuteCall extends SqlExecution::Range, DataFlow::CallCfgNode {
ExecuteCall() { this.getFunction() = clickhouse_execute() }
override DataFlow::Node getSql() { result.asCfgNode() = node.getArg(0) }
}
}

View File

@@ -64,15 +64,14 @@ private module Re {
*
* See https://docs.python.org/3/library/re.html#regular-expression-objects
*/
private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
private class CompiledRegex extends DataFlow::MethodCallNode, RegexExecution::Range {
DataFlow::Node regexNode;
CompiledRegex() {
exists(DataFlow::CallCfgNode patternCall, DataFlow::AttrRead reMethod |
this.getFunction() = reMethod and
exists(DataFlow::MethodCallNode patternCall |
patternCall = API::moduleImport("re").getMember("compile").getACall() and
patternCall.flowsTo(reMethod.getObject()) and
reMethod.getAttributeName() instanceof RegexExecutionMethods and
patternCall.flowsTo(this.getObject()) and
this.getMethodName() instanceof RegexExecutionMethods and
regexNode = patternCall.getArg(0)
)
}

View File

@@ -97,6 +97,11 @@ module API {
*/
Node getASubclass() { result = getASuccessor(Label::subclass()) }
/**
* Gets a node representing the result from awaiting this node.
*/
Node getAwaited() { result = getASuccessor(Label::await()) }
/**
* Gets a string representation of the lexicographically least among all shortest access paths
* from the root to this node.
@@ -469,6 +474,14 @@ module API {
exists(DataFlow::Node superclass | pred.flowsTo(superclass) |
ref.asExpr().(ClassExpr).getABase() = superclass.asExpr()
)
or
// awaiting
exists(Await await, DataFlow::Node awaitedValue |
lbl = Label::await() and
ref.asExpr() = await and
await.getValue() = awaitedValue.asExpr() and
pred.flowsTo(awaitedValue)
)
)
or
// Built-ins, treated as members of the module `builtins`
@@ -585,5 +598,9 @@ private module Label {
/** Gets the `return` edge label. */
string return() { result = "getReturn()" }
/** Gets the `subclass` edge label. */
string subclass() { result = "getASubclass()" }
/** Gets the `await` edge label. */
string await() { result = "getAwaited()" }
}

View File

@@ -417,7 +417,7 @@ module HTTP {
/** Gets the URL pattern for this route, if it can be statically determined. */
string getUrlPattern() {
exists(StrConst str |
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getUrlPatternArg()) and
this.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(str) and
result = str.getText()
)
}
@@ -550,9 +550,7 @@ module HTTP {
/** Gets the mimetype of this HTTP response, if it can be statically determined. */
string getMimetype() {
exists(StrConst str |
DataFlow::exprNode(str)
.(DataFlow::LocalSourceNode)
.flowsTo(this.getMimetypeOrContentTypeArg()) and
this.getMimetypeOrContentTypeArg().getALocalSource() = DataFlow::exprNode(str) and
result = str.getText().splitAt(";", 0)
)
or

View File

@@ -89,7 +89,15 @@ class File extends Container {
i.getTest().(Compare).compares(name, op, main) and
name.getId() = "__name__" and
main.getText() = "__main__"
)
) and
// Exclude files named `__main__.py`. These are often _not_ meant to be run directly, but
// contain this construct anyway.
//
// Their presence in a package (say, `foo`) means one can execute the package directly using
// `python -m foo` (which will run the `foo/__main__.py` file). Since being an entry point for
// execution means treating imports as absolute, this causes trouble, since when run with
// `python -m`, the interpreter uses the usual package semantics.
not this.getShortName() = "__main__.py"
or
// The file contains a `#!` line referencing the python interpreter
exists(Comment c |

View File

@@ -4,7 +4,9 @@
// If you add modeling of a new framework/library, remember to add it it to the docs in
// `docs/codeql/support/reusables/frameworks.rst`
private import semmle.python.frameworks.Aioch
private import semmle.python.frameworks.Aiohttp
private import semmle.python.frameworks.ClickhouseDriver
private import semmle.python.frameworks.Cryptodome
private import semmle.python.frameworks.Cryptography
private import semmle.python.frameworks.Dill
@@ -13,14 +15,17 @@ private import semmle.python.frameworks.Fabric
private import semmle.python.frameworks.Flask
private import semmle.python.frameworks.Idna
private import semmle.python.frameworks.Invoke
private import semmle.python.frameworks.Jmespath
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.MysqlConnectorPython
private import semmle.python.frameworks.Mysql
private import semmle.python.frameworks.MySQLdb
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Rsa
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.Tornado
private import semmle.python.frameworks.Twisted
private import semmle.python.frameworks.Ujson
private import semmle.python.frameworks.Yaml
private import semmle.python.frameworks.Yarl

View File

@@ -5,10 +5,14 @@
private import python
private import semmle.python.dataflow.new.DataFlow
// Need to import since frameworks can extend `RemoteFlowSource::Range`
// Need to import `semmle.python.Frameworks` since frameworks can extend `SensitiveDataSource::Range`
private import semmle.python.Frameworks
private import semmle.python.Concepts
private import semmle.python.security.SensitiveData as OldSensitiveData
private import semmle.python.security.internal.SensitiveDataHeuristics as SensitiveDataHeuristics
// We export these explicitly, so we don't also export the `HeuristicNames` module.
class SensitiveDataClassification = SensitiveDataHeuristics::SensitiveDataClassification;
module SensitiveDataClassification = SensitiveDataHeuristics::SensitiveDataClassification;
/**
* A data flow source of sensitive data, such as secrets, certificates, or passwords.
@@ -22,13 +26,9 @@ class SensitiveDataSource extends DataFlow::Node {
SensitiveDataSource() { this = range }
/**
* INTERNAL: Do not use.
*
* This will be rewritten to have better types soon, and therefore should only be used internally until then.
*
* Gets the classification of the sensitive data.
*/
string getClassification() { result = range.getClassification() }
SensitiveDataClassification getClassification() { result = range.getClassification() }
}
/** Provides a class for modeling new sources of sensitive data, such as secrets, certificates, or passwords. */
@@ -41,26 +41,225 @@ module SensitiveDataSource {
*/
abstract class Range extends DataFlow::Node {
/**
* INTERNAL: Do not use.
*
* This will be rewritten to have better types soon, and therefore should only be used internally until then.
*
* Gets the classification of the sensitive data.
*/
abstract string getClassification();
abstract SensitiveDataClassification getClassification();
}
}
private class PortOfOldModeling extends SensitiveDataSource::Range {
OldSensitiveData::SensitiveData::Source oldSensitiveSource;
/** Actual sensitive data modeling */
private module SensitiveDataModeling {
private import SensitiveDataHeuristics::HeuristicNames
PortOfOldModeling() { this.asCfgNode() = oldSensitiveSource }
/**
* Gets a reference to a function that is considered to be a sensitive source of
* `classification`.
*/
private DataFlow::LocalSourceNode sensitiveFunction(
DataFlow::TypeTracker t, SensitiveDataClassification classification
) {
t.start() and
exists(Function f |
nameIndicatesSensitiveData(f.getName(), classification) and
result.asExpr() = f.getDefinition()
)
or
exists(DataFlow::TypeTracker t2 | result = sensitiveFunction(t2, classification).track(t2, t))
}
override string getClassification() {
exists(OldSensitiveData::SensitiveData classification |
oldSensitiveSource.isSourceOf(classification)
|
classification = "sensitive.data." + result
/**
* Gets a reference to a function that is considered to be a sensitive source of
* `classification`.
*/
DataFlow::Node sensitiveFunction(SensitiveDataClassification classification) {
sensitiveFunction(DataFlow::TypeTracker::end(), classification).flowsTo(result)
}
/**
* Gets a reference to a string constant that, if used as the key in a lookup,
* indicates the presence of sensitive data with `classification`.
*/
private DataFlow::LocalSourceNode sensitiveLookupStringConst(
DataFlow::TypeTracker t, SensitiveDataClassification classification
) {
t.start() and
nameIndicatesSensitiveData(result.asExpr().(StrConst).getText(), classification)
or
exists(DataFlow::TypeTracker t2 |
result = sensitiveLookupStringConst(t2, classification).track(t2, t)
)
}
/**
* Gets a reference to a string constant that, if used as the key in a lookup,
* indicates the presence of sensitive data with `classification`.
*
* Also see `extraStepForCalls`.
*/
DataFlow::Node sensitiveLookupStringConst(SensitiveDataClassification classification) {
sensitiveLookupStringConst(DataFlow::TypeTracker::end(), classification).flowsTo(result)
}
/** A function call that is considered a source of sensitive data. */
class SensitiveFunctionCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
SensitiveDataClassification classification;
SensitiveFunctionCall() {
this.getFunction() = sensitiveFunction(classification)
or
// to cover functions that we don't have the definition for, and where the
// reference to the function has not already been marked as being sensitive
nameIndicatesSensitiveData(this.getFunction().asCfgNode().(NameNode).getId(), classification)
}
override SensitiveDataClassification getClassification() { result = classification }
}
/**
* Tracks any modeled source of sensitive data (with any classification),
* to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
*/
private DataFlow::LocalSourceNode possibleSensitiveCallable(DataFlow::TypeTracker t) {
t.start() and
result instanceof SensitiveDataSource
or
exists(DataFlow::TypeTracker t2 | result = possibleSensitiveCallable(t2).track(t2, t))
}
/**
* Tracks any modeled source of sensitive data (with any classification),
* to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
*/
private DataFlow::Node possibleSensitiveCallable() {
possibleSensitiveCallable(DataFlow::TypeTracker::end()).flowsTo(result)
}
/**
* Holds if the step from `nodeFrom` to `nodeTo` should be considered a
* taint-flow step for sensitive-data, to ensure calls are handled correctly.
*
* To handle calls properly, while preserving a good source for path explanations,
* you need to include this predicate as an additional taint step in your taint-tracking
* configurations.
*
* The core problem can be illustrated by the example below. If we consider the
* `print` a sink, what path and what source do we want to show? My initial approach
* would be to use type-tracking to propagate from the `not_found.get_passwd` attribute
* lookup, to the use of `non_sensitive_name`, and then create a new `SensitiveDataSource::Range`
* like `SensitiveFunctionCall`. Although that seems likely to work, it will also end up
* with a non-optimal path, which starts at _bad source_, and therefore doesn't show
* how we figured out that `non_sensitive_name`
* could be a function that returns a password (and in cases where there is many calls to
* `my_func` it will be annoying for someone to figure this out manually).
*
* By including this additional taint-step in the taint-tracking configuration, it's possible
* to get a path explanation going from _good source_ to the sink.
*
* ```python
* def my_func(non_sensitive_name):
* x = non_sensitive_name() # <-- bad source
* print(x) # <-- sink
*
* import not_found
* f = not_found.get_passwd # <-- good source
* my_func(f)
* ```
*/
predicate extraStepForCalls(DataFlow::Node nodeFrom, DataFlow::CallCfgNode nodeTo) {
// However, we do still use the type-tracking approach to limit the size of this
// predicate.
nodeTo.getFunction() = nodeFrom and
nodeFrom = possibleSensitiveCallable()
}
/**
* Any kind of variable assignment (also including with/for) where the name indicates
* it contains sensitive data.
*
* Note: We _could_ make any access to a variable with a sensitive name a source of
* sensitive data, but to make path explanations in data-flow/taint-tracking good,
* we don't want that, since it works against allowing users to understand the flow
* in the program (which is the whole point).
*
* Note: To make data-flow/taint-tracking work, the expression that is _assigned_ to
* the variable is marked as the source (as compared to marking the variable as the
* source).
*/
class SensitiveVariableAssignment extends SensitiveDataSource::Range {
SensitiveDataClassification classification;
SensitiveVariableAssignment() {
exists(DefinitionNode def |
nameIndicatesSensitiveData(def.(NameNode).getId(), classification) and
(
this.asCfgNode() = def.getValue()
or
this.asCfgNode() = def.getValue().(ForNode).getSequence()
) and
not this.asExpr() instanceof FunctionExpr and
not this.asExpr() instanceof ClassExpr
)
or
exists(With with |
nameIndicatesSensitiveData(with.getOptionalVars().(Name).getId(), classification) and
this.asExpr() = with.getContextExpr()
)
}
override SensitiveDataClassification getClassification() { result = classification }
}
/** An attribute access that is considered a source of sensitive data. */
class SensitiveAttributeAccess extends SensitiveDataSource::Range {
SensitiveDataClassification classification;
SensitiveAttributeAccess() {
// Things like `foo.<sensitive-name>` or `from <module> import <sensitive-name>`
// I considered excluding any `from ... import something_sensitive`, but then realized that
// we should flag up `form ... import password as ...` as a password
nameIndicatesSensitiveData(this.(DataFlow::AttrRead).getAttributeName(), classification)
or
// Things like `getattr(foo, <reference-to-string>)`
this.(DataFlow::AttrRead).getAttributeNameExpr() = sensitiveLookupStringConst(classification)
}
override SensitiveDataClassification getClassification() { result = classification }
}
/** A subscript, where the key indicates the result will be sensitive data. */
class SensitiveSubscript extends SensitiveDataSource::Range {
SensitiveDataClassification classification;
SensitiveSubscript() {
this.asCfgNode().(SubscriptNode).getIndex() =
sensitiveLookupStringConst(classification).asCfgNode()
}
override SensitiveDataClassification getClassification() { result = classification }
}
/** A call to `get` on an object, where the key indicates the result will be sensitive data. */
class SensitiveGetCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
SensitiveDataClassification classification;
SensitiveGetCall() {
this.getFunction().(DataFlow::AttrRef).getAttributeName() = "get" and
this.getArg(0) = sensitiveLookupStringConst(classification)
}
override SensitiveDataClassification getClassification() { result = classification }
}
/** A parameter where the name indicates it will receive sensitive data. */
class SensitiveParameter extends SensitiveDataSource::Range, DataFlow::ParameterNode {
SensitiveDataClassification classification;
SensitiveParameter() {
nameIndicatesSensitiveData(this.getParameter().getName(), classification)
}
override SensitiveDataClassification getClassification() { result = classification }
}
}
predicate sensitiveDataExtraStepForCalls = SensitiveDataModeling::extraStepForCalls/2;

View File

@@ -191,7 +191,7 @@ private class ClassDefinitionAsAttrWrite extends AttrWrite, CfgNode {
* - Dynamic attribute reads using `getattr`: `getattr(object, attr)`
* - Qualified imports: `from module import attr as name`
*/
abstract class AttrRead extends AttrRef, Node { }
abstract class AttrRead extends AttrRef, Node, LocalSourceNode { }
/** A simple attribute read, e.g. `object.attr` */
private class AttributeReadAsAttrRead extends AttrRead, CfgNode {

View File

@@ -724,7 +724,6 @@ private module Cached {
Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
) {
storeStep(node1, c, node2) and
read(_, c, _) and
contentType = getNodeDataFlowType(node1) and
containerType = getNodeDataFlowType(node2)
or

View File

@@ -168,7 +168,13 @@ module Consistency {
msg = "ArgumentNode is missing PostUpdateNode."
}
query predicate postWithInFlow(PostUpdateNode n, string msg) {
// This predicate helps the compiler forget that in some languages
// it is impossible for a `PostUpdateNode` to be the target of
// `simpleLocalFlowStep`.
private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() }
query predicate postWithInFlow(Node n, string msg) {
isPostUpdateNode(n) and
simpleLocalFlowStep(_, n) and
msg = "PostUpdateNode should not be the target of local flow."
}

View File

@@ -180,6 +180,45 @@ class CallCfgNode extends CfgNode, LocalSourceNode {
Node getArgByName(string name) { result.asCfgNode() = node.getArgByName(name) }
}
/**
* A data-flow node corresponding to a method call, that is `foo.bar(...)`.
*
* Also covers the case where the method lookup is done separately from the call itself, as in
* `temp = foo.bar; temp(...)`. Note that this is only tracked through local scope.
*/
class MethodCallNode extends CallCfgNode {
AttrRead method_lookup;
MethodCallNode() { method_lookup = this.getFunction().getALocalSource() }
/**
* Gets the name of the method being invoked (the `bar` in `foo.bar(...)`) if it can be determined.
*
* Note that this method may have multiple results if a single call node represents calls to
* multiple different objects and methods. If you want to link up objects and method names
* accurately, use the `calls` method instead.
*/
string getMethodName() { result = method_lookup.getAttributeName() }
/**
* Gets the data-flow node corresponding to the object receiving this call. That is, the `foo` in
* `foo.bar(...)`.
*
* Note that this method may have multiple results if a single call node represents calls to
* multiple different objects and methods. If you want to link up objects and method names
* accurately, use the `calls` method instead.
*/
Node getObject() { result = method_lookup.getObject() }
/** Holds if this data-flow node calls method `methodName` on the object node `object`. */
predicate calls(Node object, string methodName) {
// As `getObject` and `getMethodName` may both have multiple results, we must look up the object
// and method name directly on `method_lookup`.
object = method_lookup.getObject() and
methodName = method_lookup.getAttributeName()
}
}
/**
* An expression, viewed as a node in a data flow graph.
*

View File

@@ -59,6 +59,11 @@ class LocalSourceNode extends Node {
*/
AttrRead getAnAttributeRead(string attrName) { result = getAnAttributeReference(attrName) }
/**
* Gets a write of attribute `attrName` on this node.
*/
AttrWrite getAnAttributeWrite(string attrName) { result = getAnAttributeReference(attrName) }
/**
* Gets a reference (read or write) of any attribute on this node.
*/
@@ -73,11 +78,26 @@ class LocalSourceNode extends Node {
*/
AttrRead getAnAttributeRead() { result = getAnAttributeReference() }
/**
* Gets a write of any attribute on this node.
*/
AttrWrite getAnAttributeWrite() { result = getAnAttributeReference() }
/**
* Gets a call to this node.
*/
CallCfgNode getACall() { Cached::call(this, result) }
/**
* Gets a call to the method `methodName` on this node.
*
* Includes both calls that have the syntactic shape of a method call (as in `obj.m(...)`), and
* calls where the callee undergoes some additional local data flow (as in `tmp = obj.m; m(...)`).
*/
MethodCallNode getAMethodCall(string methodName) {
result = this.getAnAttributeRead(methodName).getACall()
}
/**
* Gets a node that this node may flow to using one heap and/or interprocedural step.
*

View File

@@ -9,6 +9,13 @@ private import semmle.python.dataflow.new.internal.TaintTrackingPublic
*/
predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
/**
* Holds if default `TaintTracking::Configuration`s should allow implicit reads
* of `c` at sinks and inputs to additional taint steps.
*/
bindingset[node]
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::Content c) { none() }
private module Cached {
/**
* Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all

View File

@@ -23,15 +23,57 @@ class OptionalContentName extends string {
OptionalContentName() { this instanceof ContentName or this = "" }
}
/**
* A description of a step on an inter-procedural data flow path.
*/
private newtype TStepSummary =
LevelStep() or
CallStep() or
ReturnStep() or
StoreStep(ContentName content) or
LoadStep(ContentName content)
cached
private module Cached {
/**
* A description of a step on an inter-procedural data flow path.
*/
cached
newtype TStepSummary =
LevelStep() or
CallStep() or
ReturnStep() or
StoreStep(ContentName content) or
LoadStep(ContentName content)
/** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */
cached
TypeTracker append(TypeTracker tt, StepSummary step) {
exists(Boolean hasCall, OptionalContentName content | tt = MkTypeTracker(hasCall, content) |
step = LevelStep() and result = tt
or
step = CallStep() and result = MkTypeTracker(true, content)
or
step = ReturnStep() and hasCall = false and result = tt
or
step = LoadStep(content) and result = MkTypeTracker(hasCall, "")
or
exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p))
)
}
/**
* Gets the summary that corresponds to having taken a forwards
* heap and/or intra-procedural step from `nodeFrom` to `nodeTo`.
*
* Steps contained in this predicate should _not_ depend on the call graph.
*/
cached
predicate stepNoCall(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
exists(Node mid | nodeFrom.flowsTo(mid) and smallstepNoCall(mid, nodeTo, summary))
}
/**
* Gets the summary that corresponds to having taken a forwards
* inter-procedural step from `nodeFrom` to `nodeTo`.
*/
cached
predicate stepCall(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
exists(Node mid | nodeFrom.flowsTo(mid) and smallstepCall(mid, nodeTo, summary))
}
}
private import Cached
/**
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
@@ -53,28 +95,29 @@ class StepSummary extends TStepSummary {
}
}
pragma[noinline]
private predicate smallstepNoCall(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
jumpStep(nodeFrom, nodeTo) and
summary = LevelStep()
or
exists(string content |
StepSummary::localSourceStoreStep(nodeFrom, nodeTo, content) and
summary = StoreStep(content)
or
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
)
}
pragma[noinline]
private predicate smallstepCall(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
callStep(nodeFrom, nodeTo) and summary = CallStep()
or
returnStep(nodeFrom, nodeTo) and
summary = ReturnStep()
}
/** Provides predicates for updating step summaries (`StepSummary`s). */
module StepSummary {
/**
* Gets the summary that corresponds to having taken a forwards
* heap and/or intra-procedural step from `nodeFrom` to `nodeTo`.
*
* Steps contained in this predicate should _not_ depend on the call graph.
*/
cached
private predicate stepNoCall(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
exists(Node mid | nodeFrom.flowsTo(mid) and smallstepNoCall(mid, nodeTo, summary))
}
/**
* Gets the summary that corresponds to having taken a forwards
* inter-procedural step from `nodeFrom` to `nodeTo`.
*/
cached
private predicate stepCall(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
exists(Node mid | nodeFrom.flowsTo(mid) and smallstepCall(mid, nodeTo, summary))
}
/**
* Gets the summary that corresponds to having taken a forwards
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
@@ -92,27 +135,6 @@ module StepSummary {
stepCall(nodeFrom, nodeTo, summary)
}
pragma[noinline]
private predicate smallstepNoCall(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
jumpStep(nodeFrom, nodeTo) and
summary = LevelStep()
or
exists(string content |
localSourceStoreStep(nodeFrom, nodeTo, content) and
summary = StoreStep(content)
or
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
)
}
pragma[noinline]
private predicate smallstepCall(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
callStep(nodeFrom, nodeTo) and summary = CallStep()
or
returnStep(nodeFrom, nodeTo) and
summary = ReturnStep()
}
/**
* Gets the summary that corresponds to having taken a forwards
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
@@ -193,18 +215,7 @@ class TypeTracker extends TTypeTracker {
TypeTracker() { this = MkTypeTracker(hasCall, content) }
/** Gets the summary resulting from appending `step` to this type-tracking summary. */
cached
TypeTracker append(StepSummary step) {
step = LevelStep() and result = this
or
step = CallStep() and result = MkTypeTracker(true, content)
or
step = ReturnStep() and hasCall = false and result = this
or
step = LoadStep(content) and result = MkTypeTracker(hasCall, "")
or
exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p))
}
TypeTracker append(StepSummary step) { result = append(this, step) }
/** Gets a textual representation of this summary. */
string toString() {

View File

@@ -105,6 +105,11 @@ abstract class Configuration extends DataFlow::Configuration {
defaultAdditionalTaintStep(node1, node2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
defaultImplicitTaintRead(node, c)
}
/**
* Holds if taint may flow from `source` to `sink` for this configuration.
*/

View File

@@ -105,6 +105,11 @@ abstract class Configuration extends DataFlow::Configuration {
defaultAdditionalTaintStep(node1, node2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
defaultImplicitTaintRead(node, c)
}
/**
* Holds if taint may flow from `source` to `sink` for this configuration.
*/

View File

@@ -105,6 +105,11 @@ abstract class Configuration extends DataFlow::Configuration {
defaultAdditionalTaintStep(node1, node2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
defaultImplicitTaintRead(node, c)
}
/**
* Holds if taint may flow from `source` to `sink` for this configuration.
*/

View File

@@ -105,6 +105,11 @@ abstract class Configuration extends DataFlow::Configuration {
defaultAdditionalTaintStep(node1, node2)
}
override predicate allowImplicitRead(DataFlow::Node node, DataFlow::Content c) {
(this.isSink(node) or this.isAdditionalTaintStep(node, _)) and
defaultImplicitTaintRead(node, c)
}
/**
* Holds if taint may flow from `source` to `sink` for this configuration.
*/

View File

@@ -0,0 +1,52 @@
/**
* Provides classes modeling security-relevant aspects of the `aioch` PyPI package (an
* async-io version of the `clickhouse-driver` PyPI package).
*
* See https://pypi.org/project/aioch/
*/
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
private import semmle.python.frameworks.ClickhouseDriver
/**
* INTERNAL: Do not use.
*
* Provides models for `aioch` PyPI package (an async-io version of the
* `clickhouse-driver` PyPI package).
*
* See https://pypi.org/project/aioch/
*/
module Aioch {
/** Provides models for `aioch.Client` class and subclasses. */
module Client {
/** Gets a reference to the `aioch.Client` class or any subclass. */
API::Node subclassRef() {
result = API::moduleImport("aioch").getMember("Client").getASubclass*()
}
/** Gets a reference to an instance of `clickhouse_driver.Client` or any subclass. */
API::Node instance() { result = subclassRef().getReturn() }
}
/**
* A call to any of the the execute methods on a `aioch.Client`, which are just async
* versions of the methods in the `clickhouse-driver` PyPI package.
*
* See
* - https://clickhouse-driver.readthedocs.io/en/latest/api.html#clickhouse_driver.Client.execute
* - https://clickhouse-driver.readthedocs.io/en/latest/api.html#clickhouse_driver.Client.execute_iter
* - https://clickhouse-driver.readthedocs.io/en/latest/api.html#clickhouse_driver.Client.execute_with_progress
*/
class ClientExecuteCall extends SqlExecution::Range, DataFlow::CallCfgNode {
ClientExecuteCall() {
exists(string methodName | methodName = ClickhouseDriver::getExecuteMethodName() |
this = Client::instance().getMember(methodName).getACall()
)
}
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
}
}

View File

@@ -0,0 +1,65 @@
/**
* Provides classes modeling security-relevant aspects of the `clickhouse-driver` PyPI package.
* See
* - https://pypi.org/project/clickhouse-driver/
* - https://clickhouse-driver.readthedocs.io/en/latest/
*/
private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
/**
* INTERNAL: Do not use.
*
* Provides models for `clickhouse-driver` PyPI package (imported as `clickhouse_driver`).
* See
* - https://pypi.org/project/clickhouse-driver/
* - https://clickhouse-driver.readthedocs.io/en/latest/
*/
module ClickhouseDriver {
/**
* `clickhouse_driver` implements PEP249,
* providing ways to execute SQL statements against a database.
*/
class ClickHouseDriverPEP249 extends PEP249ModuleApiNode {
ClickHouseDriverPEP249() { this = API::moduleImport("clickhouse_driver") }
}
/** Provides models for `clickhouse_driver.Client` class and subclasses. */
module Client {
/** Gets a reference to the `clickhouse_driver.Client` class or any subclass. */
API::Node subclassRef() {
exists(API::Node classRef |
// canonical definition
classRef = API::moduleImport("clickhouse_driver").getMember("client").getMember("Client")
or
// commonly used alias
classRef = API::moduleImport("clickhouse_driver").getMember("Client")
|
result = classRef.getASubclass*()
)
}
/** Gets a reference to an instance of `clickhouse_driver.Client` or any subclass. */
API::Node instance() { result = subclassRef().getReturn() }
}
/** `clickhouse_driver.Client` execute method names */
string getExecuteMethodName() { result in ["execute_with_progress", "execute", "execute_iter"] }
/**
* A call to any of the the execute methods on a `clickhouse_driver.Client` method
*
* See
* - https://clickhouse-driver.readthedocs.io/en/latest/api.html#clickhouse_driver.Client.execute
* - https://clickhouse-driver.readthedocs.io/en/latest/api.html#clickhouse_driver.Client.execute_iter
* - https://clickhouse-driver.readthedocs.io/en/latest/api.html#clickhouse_driver.Client.execute_with_progress
*/
class ClientExecuteCall extends SqlExecution::Range, DataFlow::CallCfgNode {
ClientExecuteCall() { this = Client::instance().getMember(getExecuteMethodName()).getACall() }
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
}
}

View File

@@ -228,11 +228,7 @@ private module CryptographyModel {
/** Gets a reference to the encryptor of a Cipher instance using algorithm with `algorithmName`. */
DataFlow::LocalSourceNode cipherEncryptor(DataFlow::TypeTracker t, string algorithmName) {
t.start() and
exists(DataFlow::AttrRead attr |
result.(DataFlow::CallCfgNode).getFunction() = attr and
attr.getAttributeName() = "encryptor" and
attr.getObject() = cipherInstance(algorithmName)
)
result.(DataFlow::MethodCallNode).calls(cipherInstance(algorithmName), "encryptor")
or
exists(DataFlow::TypeTracker t2 | result = cipherEncryptor(t2, algorithmName).track(t2, t))
}
@@ -249,11 +245,7 @@ private module CryptographyModel {
/** Gets a reference to the dncryptor of a Cipher instance using algorithm with `algorithmName`. */
DataFlow::LocalSourceNode cipherDecryptor(DataFlow::TypeTracker t, string algorithmName) {
t.start() and
exists(DataFlow::AttrRead attr |
result.(DataFlow::CallCfgNode).getFunction() = attr and
attr.getAttributeName() = "decryptor" and
attr.getObject() = cipherInstance(algorithmName)
)
result.(DataFlow::MethodCallNode).calls(cipherInstance(algorithmName), "decryptor")
or
exists(DataFlow::TypeTracker t2 | result = cipherDecryptor(t2, algorithmName).track(t2, t))
}
@@ -271,18 +263,14 @@ private module CryptographyModel {
* An encrypt or decrypt operation from `cryptography.hazmat.primitives.ciphers`.
*/
class CryptographyGenericCipherOperation extends Cryptography::CryptographicOperation::Range,
DataFlow::CallCfgNode {
DataFlow::MethodCallNode {
string algorithmName;
CryptographyGenericCipherOperation() {
exists(DataFlow::AttrRead attr |
this.getFunction() = attr and
attr.getAttributeName() = ["update", "update_into"] and
(
attr.getObject() = cipherEncryptor(algorithmName)
or
attr.getObject() = cipherDecryptor(algorithmName)
)
exists(DataFlow::Node object, string method |
object in [cipherEncryptor(algorithmName), cipherDecryptor(algorithmName)] and
method in ["update", "update_into"] and
this.calls(object, method)
)
}
@@ -337,16 +325,10 @@ private module CryptographyModel {
* An hashing operation from `cryptography.hazmat.primitives.hashes`.
*/
class CryptographyGenericHashOperation extends Cryptography::CryptographicOperation::Range,
DataFlow::CallCfgNode {
DataFlow::MethodCallNode {
string algorithmName;
CryptographyGenericHashOperation() {
exists(DataFlow::AttrRead attr |
this.getFunction() = attr and
attr.getAttributeName() = "update" and
attr.getObject() = hashInstance(algorithmName)
)
}
CryptographyGenericHashOperation() { this.calls(hashInstance(algorithmName), "update") }
override Cryptography::CryptographicAlgorithm getAlgorithm() {
result.matchesName(algorithmName)

View File

@@ -401,11 +401,11 @@ private module PrivateDjango {
* Gets an instance of the `django.db.models.expressions.RawSQL` class,
* that was initiated with the SQL represented by `sql`.
*/
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t, ControlFlowNode sql) {
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t, DataFlow::Node sql) {
t.start() and
exists(DataFlow::CallCfgNode c | result = c |
c = classRef().getACall() and
c.getArg(0).asCfgNode() = sql
c.getArg(0) = sql
)
or
exists(DataFlow::TypeTracker t2 | result = instance(t2, sql).track(t2, t))
@@ -415,7 +415,7 @@ private module PrivateDjango {
* Gets an instance of the `django.db.models.expressions.RawSQL` class,
* that was initiated with the SQL represented by `sql`.
*/
DataFlow::Node instance(ControlFlowNode sql) {
DataFlow::Node instance(DataFlow::Node sql) {
instance(DataFlow::TypeTracker::end(), sql).flowsTo(result)
}
}
@@ -431,7 +431,7 @@ private module PrivateDjango {
* See https://docs.djangoproject.com/en/3.1/ref/models/querysets/#annotate
*/
private class ObjectsAnnotate extends SqlExecution::Range, DataFlow::CallCfgNode {
ControlFlowNode sql;
DataFlow::Node sql;
ObjectsAnnotate() {
this = django::db::models::querySetReturningMethod("annotate").getACall() and
@@ -440,7 +440,7 @@ private module PrivateDjango {
]
}
override DataFlow::Node getSql() { result.asCfgNode() = sql }
override DataFlow::Node getSql() { result = sql }
}
/**
@@ -449,7 +449,7 @@ private module PrivateDjango {
* See https://docs.djangoproject.com/en/3.2/ref/models/querysets/#alias
*/
private class ObjectsAlias extends SqlExecution::Range, DataFlow::CallCfgNode {
ControlFlowNode sql;
DataFlow::Node sql;
ObjectsAlias() {
this = django::db::models::querySetReturningMethod("alias").getACall() and
@@ -458,7 +458,7 @@ private module PrivateDjango {
]
}
override DataFlow::Node getSql() { result.asCfgNode() = sql }
override DataFlow::Node getSql() { result = sql }
}
/**
@@ -631,12 +631,12 @@ private module PrivateDjango {
ClassInstantiation() { this = classRef().getACall() }
override DataFlow::Node getBody() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("content")]
result in [this.getArg(0), this.getArgByName("content")]
}
// How to support the `headers` argument here?
override DataFlow::Node getMimetypeOrContentTypeArg() {
result.asCfgNode() in [node.getArg(1), node.getArgByName("content_type")]
result in [this.getArg(1), this.getArgByName("content_type")]
}
override string getMimetypeDefault() { result = "text/html" }
@@ -695,11 +695,11 @@ private module PrivateDjango {
// note that even though browsers like Chrome usually doesn't fetch the
// content of a redirect, it is possible to observe the body (for example,
// with cURL).
result.asCfgNode() in [node.getArg(1), node.getArgByName("content")]
result in [this.getArg(1), this.getArgByName("content")]
}
override DataFlow::Node getRedirectLocation() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("redirect_to")]
result in [this.getArg(0), this.getArgByName("redirect_to")]
}
// How to support the `headers` argument here?
@@ -757,11 +757,11 @@ private module PrivateDjango {
// note that even though browsers like Chrome usually doesn't fetch the
// content of a redirect, it is possible to observe the body (for example,
// with cURL).
result.asCfgNode() in [node.getArg(1), node.getArgByName("content")]
result in [this.getArg(1), this.getArgByName("content")]
}
override DataFlow::Node getRedirectLocation() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("redirect_to")]
result in [this.getArg(0), this.getArgByName("redirect_to")]
}
// How to support the `headers` argument here?
@@ -868,7 +868,7 @@ private module PrivateDjango {
ClassInstantiation() { this = classRef().getACall() }
override DataFlow::Node getBody() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("content")]
result in [this.getArg(0), this.getArgByName("content")]
}
// How to support the `headers` argument here?
@@ -922,7 +922,7 @@ private module PrivateDjango {
ClassInstantiation() { this = classRef().getACall() }
override DataFlow::Node getBody() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("content")]
result in [this.getArg(0), this.getArgByName("content")]
}
// How to support the `headers` argument here?
@@ -976,7 +976,7 @@ private module PrivateDjango {
ClassInstantiation() { this = classRef().getACall() }
override DataFlow::Node getBody() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("content")]
result in [this.getArg(0), this.getArgByName("content")]
}
// How to support the `headers` argument here?
@@ -1031,7 +1031,7 @@ private module PrivateDjango {
override DataFlow::Node getBody() {
// First argument is permitted methods
result.asCfgNode() in [node.getArg(1), node.getArgByName("content")]
result in [this.getArg(1), this.getArgByName("content")]
}
// How to support the `headers` argument here?
@@ -1085,7 +1085,7 @@ private module PrivateDjango {
ClassInstantiation() { this = classRef().getACall() }
override DataFlow::Node getBody() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("content")]
result in [this.getArg(0), this.getArgByName("content")]
}
// How to support the `headers` argument here?
@@ -1139,7 +1139,7 @@ private module PrivateDjango {
ClassInstantiation() { this = classRef().getACall() }
override DataFlow::Node getBody() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("content")]
result in [this.getArg(0), this.getArgByName("content")]
}
// How to support the `headers` argument here?
@@ -1193,7 +1193,7 @@ private module PrivateDjango {
ClassInstantiation() { this = classRef().getACall() }
override DataFlow::Node getBody() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("data")]
result in [this.getArg(0), this.getArgByName("data")]
}
// How to support the `headers` argument here?
@@ -1250,7 +1250,7 @@ private module PrivateDjango {
ClassInstantiation() { this = classRef().getACall() }
override DataFlow::Node getBody() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("streaming_content")]
result in [this.getArg(0), this.getArgByName("streaming_content")]
}
// How to support the `headers` argument here?
@@ -1304,7 +1304,7 @@ private module PrivateDjango {
ClassInstantiation() { this = classRef().getACall() }
override DataFlow::Node getBody() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("streaming_content")]
result in [this.getArg(0), this.getArgByName("streaming_content")]
}
// How to support the `headers` argument here?
@@ -1349,14 +1349,13 @@ private module PrivateDjango {
*
* See https://docs.djangoproject.com/en/3.1/ref/request-response/#django.http.HttpResponse.write
*/
class HttpResponseWriteCall extends HTTP::Server::HttpResponse::Range, DataFlow::CfgNode {
override CallNode node;
class HttpResponseWriteCall extends HTTP::Server::HttpResponse::Range, DataFlow::CallCfgNode {
HTTP::Server::HttpResponse::Range instance;
HttpResponseWriteCall() { node.getFunction() = write(instance).asCfgNode() }
override DataFlow::Node getBody() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("content")]
result in [this.getArg(0), this.getArgByName("content")]
}
override DataFlow::Node getMimetypeOrContentTypeArg() {
@@ -1639,12 +1638,10 @@ private module PrivateDjango {
DjangoUrlsPathCall() { this = django::urls::path().getACall() }
override DataFlow::Node getUrlPatternArg() {
result.asCfgNode() = [node.getArg(0), node.getArgByName("route")]
result in [this.getArg(0), this.getArgByName("route")]
}
override DataFlow::Node getViewArg() {
result.asCfgNode() in [node.getArg(1), node.getArgByName("view")]
}
override DataFlow::Node getViewArg() { result in [this.getArg(1), this.getArgByName("view")] }
override Parameter getARoutedParameter() {
// If we don't know the URL pattern, we simply mark all parameters as a routed
@@ -1708,7 +1705,7 @@ private module PrivateDjango {
DjangoRouteRegex() {
this instanceof StrConst and
DataFlow::exprNode(this).(DataFlow::LocalSourceNode).flowsTo(rePathCall.getUrlPatternArg())
rePathCall.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(this)
}
DjangoRegexRouteSetup getRouteSetup() { result = rePathCall }
@@ -1739,12 +1736,10 @@ private module PrivateDjango {
}
override DataFlow::Node getUrlPatternArg() {
result.asCfgNode() = [node.getArg(0), node.getArgByName("route")]
result in [this.getArg(0), this.getArgByName("route")]
}
override DataFlow::Node getViewArg() {
result.asCfgNode() in [node.getArg(1), node.getArgByName("view")]
}
override DataFlow::Node getViewArg() { result in [this.getArg(1), this.getArgByName("view")] }
}
/**
@@ -1756,12 +1751,10 @@ private module PrivateDjango {
DjangoConfUrlsUrlCall() { this = django::conf::conf_urls::url().getACall() }
override DataFlow::Node getUrlPatternArg() {
result.asCfgNode() = [node.getArg(0), node.getArgByName("regex")]
result in [this.getArg(0), this.getArgByName("regex")]
}
override DataFlow::Node getViewArg() {
result.asCfgNode() in [node.getArg(1), node.getArgByName("view")]
}
override DataFlow::Node getViewArg() { result in [this.getArg(1), this.getArgByName("view")] }
}
// ---------------------------------------------------------------------------
@@ -1872,7 +1865,7 @@ private module PrivateDjango {
* a string identifying a view, or a Django model.
*/
override DataFlow::Node getRedirectLocation() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("to")]
result in [this.getArg(0), this.getArgByName("to")]
}
override DataFlow::Node getBody() { none() }

View File

@@ -48,7 +48,7 @@ private module FabricV1 {
FabricApiLocalRunSudoCall() { this = api().getMember(["local", "run", "sudo"]).getACall() }
override DataFlow::Node getCommand() {
result.asCfgNode() = [node.getArg(0), node.getArgByName("command")]
result = [this.getArg(0), this.getArgByName("command")]
}
}
}
@@ -159,7 +159,7 @@ private module FabricV2 {
}
override DataFlow::Node getCommand() {
result.asCfgNode() = [node.getArg(0), node.getArgByName("command")]
result = [this.getArg(0), this.getArgByName("command")]
}
}
@@ -239,7 +239,7 @@ private module FabricV2 {
FabricGroupRunCall() { this = fabric::group::Group::subclassInstanceRunMethod().getACall() }
override DataFlow::Node getCommand() {
result.asCfgNode() = [node.getArg(0), node.getArgByName("command")]
result = [this.getArg(0), this.getArgByName("command")]
}
}

View File

@@ -81,7 +81,7 @@ private module Invoke {
}
override DataFlow::Node getCommand() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("command")]
result in [this.getArg(0), this.getArgByName("command")]
}
}
}

View File

@@ -0,0 +1,35 @@
/**
* Provides classes modeling security-relevant aspects of the `jmespath` PyPI package.
* See https://pypi.org/project/jmespath/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `jmespath` PyPI package.
* See https://pypi.org/project/jmespath/.
*/
private module Jmespath {
class JmespathAdditionalTaintSteps extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(DataFlow::CallCfgNode call |
call = API::moduleImport("jmespath").getMember("search").getACall() and
nodeFrom in [call.getArg(1), call.getArgByName("data")] and
nodeTo = call
or
call =
API::moduleImport("jmespath")
.getMember("compile")
.getReturn()
.getMember("search")
.getACall() and
nodeFrom in [call.getArg(0), call.getArgByName("value")] and
nodeTo = call
)
}
}
}

View File

@@ -1,5 +1,7 @@
/**
* Provides classes modeling security-relevant aspects of the `MySQLdb` PyPI package.
* Provides classes modeling security-relevant aspects of the `MySQL-python` PyPI package
* (imported as `MySQLdb`).
*
* See
* - https://mysqlclient.readthedocs.io/index.html
* - https://pypi.org/project/MySQL-python/
@@ -13,7 +15,7 @@ private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
/**
* Provides models for the `MySQLdb` PyPI package.
* Provides models for the `MySQL-python` PyPI package (imported as `MySQLdb`).
* See
* - https://mysqlclient.readthedocs.io/index.html
* - https://pypi.org/project/MySQL-python/

View File

@@ -1,5 +1,6 @@
/**
* Provides classes modeling security-relevant aspects of the `mysql-connector-python` package.
* Provides classes modeling security-relevant aspects of the `mysql-connector-python`
* and `mysql-connector` (old package name) PyPI packages (imported as `mysql`).
* See
* - https://dev.mysql.com/doc/connector-python/en/
* - https://dev.mysql.com/doc/connector-python/en/connector-python-example-connecting.html
@@ -13,12 +14,13 @@ private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
/**
* Provides models for the `mysql-connector-python` package.
* Provides classes modeling security-relevant aspects of the `mysql-connector-python`
* and `mysql-connector` (old package name) PyPI packages (imported as `mysql`).
* See
* - https://dev.mysql.com/doc/connector-python/en/
* - https://dev.mysql.com/doc/connector-python/en/connector-python-example-connecting.html
*/
private module MysqlConnectorPython {
private module Mysql {
// ---------------------------------------------------------------------------
// mysql
// ---------------------------------------------------------------------------

View File

@@ -122,7 +122,5 @@ DataFlow::Node execute() { execute(DataFlow::TypeTracker::end()).flowsTo(result)
private class ExecuteCall extends SqlExecution::Range, DataFlow::CallCfgNode {
ExecuteCall() { this.getFunction() = execute() }
override DataFlow::Node getSql() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("sql")]
}
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("sql")] }
}

View File

@@ -0,0 +1,141 @@
/**
* Provides classes modeling security-relevant aspects of the `rsa` PyPI package.
* See https://stuvel.eu/python-rsa-doc/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `rsa` PyPI package.
* See https://stuvel.eu/python-rsa-doc/.
*/
private module Rsa {
/**
* A call to `rsa.newkeys`
*
* See https://stuvel.eu/python-rsa-doc/reference.html#rsa.newkeys
*/
class RsaNewkeysCall extends Cryptography::PublicKey::KeyGeneration::RsaRange,
DataFlow::CallCfgNode {
RsaNewkeysCall() { this = API::moduleImport("rsa").getMember("newkeys").getACall() }
override DataFlow::Node getKeySizeArg() {
result in [this.getArg(0), this.getArgByName("nbits")]
}
}
/**
* A call to `rsa.encrypt`
*
* See https://stuvel.eu/python-rsa-doc/reference.html#rsa.encrypt
*/
class RsaEncryptCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode {
RsaEncryptCall() { this = API::moduleImport("rsa").getMember("encrypt").getACall() }
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.getName() = "RSA" }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("message")]
}
}
/**
* A call to `rsa.decrypt`
*
* See https://stuvel.eu/python-rsa-doc/reference.html#rsa.decrypt
*/
class RsaDecryptCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode {
RsaDecryptCall() { this = API::moduleImport("rsa").getMember("decrypt").getACall() }
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.getName() = "RSA" }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("crypto")] }
}
/**
* A call to `rsa.sign`, which both hashes and signs in the input message.
*
* See https://stuvel.eu/python-rsa-doc/reference.html#rsa.sign
*/
class RsaSignCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode {
RsaSignCall() { this = API::moduleImport("rsa").getMember("sign").getACall() }
override Cryptography::CryptographicAlgorithm getAlgorithm() {
// signature part
result.getName() = "RSA"
or
// hashing part
exists(StrConst str, DataFlow::Node hashNameArg |
hashNameArg in [this.getArg(2), this.getArgByName("hash_method")] and
DataFlow::exprNode(str) = hashNameArg.getALocalSource() and
result.matchesName(str.getText())
)
}
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("message")]
}
}
/**
* A call to `rsa.verify`
*
* See https://stuvel.eu/python-rsa-doc/reference.html#rsa.verify
*/
class RsaVerifyCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode {
RsaVerifyCall() { this = API::moduleImport("rsa").getMember("verify").getACall() }
override Cryptography::CryptographicAlgorithm getAlgorithm() {
// note that technically there is also a hashing operation going on but we don't
// know what algorithm is used up front, since it is encoded in the signature
result.getName() = "RSA"
}
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("message")]
or
result in [this.getArg(1), this.getArgByName("signature")]
}
}
/**
* A call to `rsa.compute_hash`
*
* See https://stuvel.eu/python-rsa-doc/reference.html#rsa.compute_hash
*/
class RsaComputeHashCall extends Cryptography::CryptographicOperation::Range,
DataFlow::CallCfgNode {
RsaComputeHashCall() { this = API::moduleImport("rsa").getMember("compute_hash").getACall() }
override Cryptography::CryptographicAlgorithm getAlgorithm() {
exists(StrConst str, DataFlow::Node hashNameArg |
hashNameArg in [this.getArg(1), this.getArgByName("method_name")] and
DataFlow::exprNode(str) = hashNameArg.getALocalSource() and
result.matchesName(str.getText())
)
}
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("message")]
}
}
/**
* A call to `rsa.sign_hash`
*
* See https://stuvel.eu/python-rsa-doc/reference.html#rsa.sign_hash
*/
class RsaSignHashCall extends Cryptography::CryptographicOperation::Range, DataFlow::CallCfgNode {
RsaSignHashCall() { this = API::moduleImport("rsa").getMember("sign_hash").getACall() }
override Cryptography::CryptographicAlgorithm getAlgorithm() { result.getName() = "RSA" }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("hash_value")]
}
}
}

View File

@@ -38,9 +38,7 @@ private module Stdlib {
private class OsPathNormpathCall extends Path::PathNormalization::Range, DataFlow::CallCfgNode {
OsPathNormpathCall() { this = os::path().getMember("normpath").getACall() }
DataFlow::Node getPathArg() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("path")]
}
DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] }
}
/** An additional taint step for calls to `os.path.normpath` */
@@ -60,9 +58,7 @@ private module Stdlib {
private class OsPathAbspathCall extends Path::PathNormalization::Range, DataFlow::CallCfgNode {
OsPathAbspathCall() { this = os::path().getMember("abspath").getACall() }
DataFlow::Node getPathArg() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("path")]
}
DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] }
}
/** An additional taint step for calls to `os.path.abspath` */
@@ -82,9 +78,7 @@ private module Stdlib {
private class OsPathRealpathCall extends Path::PathNormalization::Range, DataFlow::CallCfgNode {
OsPathRealpathCall() { this = os::path().getMember("realpath").getACall() }
DataFlow::Node getPathArg() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("path")]
}
DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] }
}
/** An additional taint step for calls to `os.path.realpath` */
@@ -104,7 +98,7 @@ private module Stdlib {
private class OsSystemCall extends SystemCommandExecution::Range, DataFlow::CallCfgNode {
OsSystemCall() { this = os().getMember("system").getACall() }
override DataFlow::Node getCommand() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getCommand() { result = this.getArg(0) }
}
/**
@@ -124,10 +118,10 @@ private module Stdlib {
}
override DataFlow::Node getCommand() {
result.asCfgNode() = node.getArg(0)
result = this.getArg(0)
or
not name = "popen" and
result.asCfgNode() = node.getArgByName("cmd")
result = this.getArgByName("cmd")
}
}
@@ -143,7 +137,7 @@ private module Stdlib {
)
}
override DataFlow::Node getCommand() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getCommand() { result = this.getArg(0) }
}
/**
@@ -160,7 +154,7 @@ private module Stdlib {
)
}
override DataFlow::Node getCommand() { result.asCfgNode() = node.getArg(1) }
override DataFlow::Node getCommand() { result = this.getArg(1) }
}
/**
@@ -170,7 +164,7 @@ private module Stdlib {
private class OsPosixSpawnCall extends SystemCommandExecution::Range, DataFlow::CallCfgNode {
OsPosixSpawnCall() { this = os().getMember(["posix_spawn", "posix_spawnp"]).getACall() }
override DataFlow::Node getCommand() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getCommand() { result = this.getArg(0) }
}
/** An additional taint step for calls to `os.path.join` */
@@ -204,22 +198,22 @@ private module Stdlib {
}
/** Gets the ControlFlowNode for the `args` argument, if any. */
private ControlFlowNode get_args_arg() { result in [node.getArg(0), node.getArgByName("args")] }
private DataFlow::Node get_args_arg() { result in [this.getArg(0), this.getArgByName("args")] }
/** Gets the ControlFlowNode for the `shell` argument, if any. */
private ControlFlowNode get_shell_arg() {
result in [node.getArg(8), node.getArgByName("shell")]
private DataFlow::Node get_shell_arg() {
result in [this.getArg(8), this.getArgByName("shell")]
}
private boolean get_shell_arg_value() {
not exists(this.get_shell_arg()) and
result = false
or
exists(ControlFlowNode shell_arg | shell_arg = this.get_shell_arg() |
result = shell_arg.getNode().(ImmutableLiteral).booleanValue()
exists(DataFlow::Node shell_arg | shell_arg = this.get_shell_arg() |
result = shell_arg.asCfgNode().getNode().(ImmutableLiteral).booleanValue()
or
// TODO: Track the "shell" argument to determine possible values
not shell_arg.getNode() instanceof ImmutableLiteral and
not shell_arg.asCfgNode().getNode() instanceof ImmutableLiteral and
(
result = true
or
@@ -229,16 +223,16 @@ private module Stdlib {
}
/** Gets the ControlFlowNode for the `executable` argument, if any. */
private ControlFlowNode get_executable_arg() {
result in [node.getArg(2), node.getArgByName("executable")]
private DataFlow::Node get_executable_arg() {
result in [this.getArg(2), this.getArgByName("executable")]
}
override DataFlow::Node getCommand() {
// TODO: Track arguments ("args" and "shell")
// TODO: Handle using `args=["sh", "-c", <user-input>]`
result.asCfgNode() = this.get_executable_arg()
result = this.get_executable_arg()
or
exists(ControlFlowNode arg_args, boolean shell |
exists(DataFlow::Node arg_args, boolean shell |
arg_args = get_args_arg() and
shell = get_shell_arg_value()
|
@@ -254,14 +248,14 @@ private module Stdlib {
// run, so if we're able to, we only mark the first element as the command
// (and not the arguments to the command).
//
result.asCfgNode() = arg_args.(SequenceNode).getElement(0)
result.asCfgNode() = arg_args.asCfgNode().(SequenceNode).getElement(0)
or
// Either the "args" argument is not a sequence (which is valid) or we where
// just not able to figure it out. Simply mark the "args" argument as the
// command.
//
not arg_args instanceof SequenceNode and
result.asCfgNode() = arg_args
not arg_args.asCfgNode() instanceof SequenceNode and
result = arg_args
)
)
}
@@ -334,9 +328,7 @@ private module Stdlib {
)
}
override DataFlow::Node getCommand() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("cmd")]
}
override DataFlow::Node getCommand() { result in [this.getArg(0), this.getArgByName("cmd")] }
}
// ---------------------------------------------------------------------------
@@ -352,9 +344,7 @@ private module Stdlib {
private class PlatformPopenCall extends SystemCommandExecution::Range, DataFlow::CallCfgNode {
PlatformPopenCall() { this = platform().getMember("popen").getACall() }
override DataFlow::Node getCommand() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("cmd")]
}
override DataFlow::Node getCommand() { result in [this.getArg(0), this.getArgByName("cmd")] }
}
// ---------------------------------------------------------------------------
@@ -442,7 +432,7 @@ private module Stdlib {
this = base64().getMember(name).getACall()
}
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getOutput() { result = this }
@@ -476,7 +466,7 @@ private module Stdlib {
override predicate mayExecuteInput() { none() }
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getOutput() { result = this }
@@ -1097,7 +1087,7 @@ private DataFlow::CallCfgNode hashlibNewCall(string algorithmName) {
result = API::moduleImport("hashlib").getMember("new").getACall() and
nameArg in [result.getArg(0), result.getArgByName("name")] and
exists(StrConst str |
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(nameArg) and
nameArg.getALocalSource() = DataFlow::exprNode(str) and
algorithmName = str.getText()
)
)

View File

@@ -349,7 +349,7 @@ private module Tornado {
TornadoRouteRegex() {
this instanceof StrConst and
DataFlow::exprNode(this).(DataFlow::LocalSourceNode).flowsTo(setup.getUrlPatternArg())
setup.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(this)
}
TornadoRouteSetup getRouteSetup() { result = setup }
@@ -431,7 +431,7 @@ private module Tornado {
}
override DataFlow::Node getRedirectLocation() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("url")]
result in [this.getArg(0), this.getArgByName("url")]
}
override DataFlow::Node getBody() { none() }
@@ -452,9 +452,7 @@ private module Tornado {
this.getFunction() = tornado::web::RequestHandler::writeMethod()
}
override DataFlow::Node getBody() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("chunk")]
}
override DataFlow::Node getBody() { result in [this.getArg(0), this.getArgByName("chunk")] }
override string getMimetypeDefault() { result = "text/html" }

View File

@@ -0,0 +1,250 @@
/**
* Provides classes modeling security-relevant aspects of the `twisted` PyPI package.
* See https://twistedmatrix.com/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `twisted` PyPI package.
* See https://twistedmatrix.com/.
*/
private module Twisted {
// ---------------------------------------------------------------------------
// request handler modeling
// ---------------------------------------------------------------------------
/**
* A class that is a subclass of `twisted.web.resource.Resource`, thereby
* being able to handle HTTP requests.
*
* See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.resource.Resource.html
*/
class TwistedResourceSubclass extends Class {
TwistedResourceSubclass() {
this.getABase() =
API::moduleImport("twisted")
.getMember("web")
.getMember("resource")
.getMember("Resource")
.getASubclass*()
.getAUse()
.asExpr()
}
/** Gets a function that could handle incoming requests, if any. */
Function getARequestHandler() {
// TODO: This doesn't handle attribute assignment. Should be OK, but analysis is not as complete as with
// points-to and `.lookup`, which would handle `post = my_post_handler` inside class def
result = this.getAMethod() and
exists(getRequestParamIndex(result.getName()))
}
}
/**
* Gets the index the request parameter is supposed to be at for the method named
* `methodName` in a `twisted.web.resource.Resource` subclass.
*/
bindingset[methodName]
private int getRequestParamIndex(string methodName) {
methodName.matches("render_%") and result = 1
or
methodName in ["render", "listDynamicEntities", "getChildForRequest"] and result = 1
or
methodName = ["getDynamicEntity", "getChild", "getChildWithDefault"] and result = 2
}
/** A method that handles incoming requests, on a `twisted.web.resource.Resource` subclass. */
class TwistedResourceRequestHandler extends HTTP::Server::RequestHandler::Range {
TwistedResourceRequestHandler() { this = any(TwistedResourceSubclass cls).getARequestHandler() }
Parameter getRequestParameter() { result = this.getArg(getRequestParamIndex(this.getName())) }
override Parameter getARoutedParameter() { none() }
override string getFramework() { result = "twisted" }
}
/**
* A "render" method on a `twisted.web.resource.Resource` subclass, whose return value
* is written as the body of the HTTP response.
*/
class TwistedResourceRenderMethod extends TwistedResourceRequestHandler {
TwistedResourceRenderMethod() {
this.getName() = "render" or this.getName().matches("render_%")
}
}
// ---------------------------------------------------------------------------
// request modeling
// ---------------------------------------------------------------------------
/**
* Provides models for the `twisted.web.server.Request` class
*
* See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.server.Request.html
*/
module Request {
/**
* A source of instances of `twisted.web.server.Request`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use `Request::instance()` predicate to get
* references to instances of `twisted.web.server.Request`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of `twisted.web.server.Request`. */
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `twisted.web.server.Request`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
}
/**
* A parameter that will receive a `twisted.web.server.Request` instance,
* when a twisted request handler is called.
*/
class TwistedResourceRequestHandlerRequestParam extends RemoteFlowSource::Range,
Request::InstanceSource, DataFlow::ParameterNode {
TwistedResourceRequestHandlerRequestParam() {
this.getParameter() = any(TwistedResourceRequestHandler handler).getRequestParameter()
}
override string getSourceType() { result = "twisted.web.server.Request" }
}
/**
* Taint propagation for `twisted.web.server.Request`.
*/
private class TwistedRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Methods
//
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
nodeFrom = Request::instance() and
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom |
// normal (non-async) methods
attr.getAttributeName() in [
"getCookie", "getHeader", "getAllHeaders", "getUser", "getPassword", "getHost",
"getRequestHostname"
] and
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
)
or
// Attributes
nodeFrom = Request::instance() and
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and
nodeTo.(DataFlow::AttrRead).getAttributeName() in [
"uri", "path", "prepath", "postpath", "content", "args", "received_cookies",
"requestHeaders", "user", "password", "host"
]
}
}
/**
* A parameter of a request handler method (on a `twisted.web.resource.Resource` subclass)
* that is also given remote user input. (a bit like RoutedParameter).
*/
class TwistedResourceRequestHandlerExtraSources extends RemoteFlowSource::Range,
DataFlow::ParameterNode {
TwistedResourceRequestHandlerExtraSources() {
exists(TwistedResourceRequestHandler func, int i |
func.getName() in ["getChild", "getChildWithDefault"] and i = 1
or
func.getName() = "getDynamicEntity" and i = 1
|
this.getParameter() = func.getArg(i)
)
}
override string getSourceType() { result = "twisted Resource method extra parameter" }
}
// ---------------------------------------------------------------------------
// response modeling
// ---------------------------------------------------------------------------
/**
* Implicit response from returns of render methods.
*/
private class TwistedResourceRenderMethodReturn extends HTTP::Server::HttpResponse::Range,
DataFlow::CfgNode {
TwistedResourceRenderMethodReturn() {
this.asCfgNode() = any(TwistedResourceRenderMethod meth).getAReturnValueFlowNode()
}
override DataFlow::Node getBody() { result = this }
override DataFlow::Node getMimetypeOrContentTypeArg() { none() }
override string getMimetypeDefault() { result = "text/html" }
}
/**
* A call to the `twisted.web.server.Request.write` function.
*
* See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.server.Request.html#write
*/
class TwistedRequestWriteCall extends HTTP::Server::HttpResponse::Range, DataFlow::CallCfgNode {
TwistedRequestWriteCall() {
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
exists(DataFlow::AttrRead read |
this.getFunction() = read and
read.getObject() = Request::instance() and
read.getAttributeName() = "write"
)
}
override DataFlow::Node getBody() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("data")]
}
override DataFlow::Node getMimetypeOrContentTypeArg() { none() }
override string getMimetypeDefault() { result = "text/html" }
}
/**
* A call to the `redirect` function on a twisted request.
*
* See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.http.Request.html#redirect
*/
class TwistedRequestRedirectCall extends HTTP::Server::HttpRedirectResponse::Range,
DataFlow::CallCfgNode {
TwistedRequestRedirectCall() {
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
exists(DataFlow::AttrRead read |
this.getFunction() = read and
read.getObject() = Request::instance() and
read.getAttributeName() = "redirect"
)
}
override DataFlow::Node getBody() { none() }
override DataFlow::Node getRedirectLocation() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("url")]
}
override DataFlow::Node getMimetypeOrContentTypeArg() { none() }
override string getMimetypeDefault() { result = "text/html" }
}
}

View File

@@ -88,7 +88,7 @@ module Werkzeug {
or
// getlist -> getlist()
nodeFrom = werkzeug::datastructures::MultiDict::getlist() and
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode()
nodeTo.(DataFlow::CallCfgNode).getFunction() = nodeFrom
}
}

View File

@@ -13,6 +13,7 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.dataflow.new.SensitiveDataSources
/**
* Provides a taint-tracking configuration for detecting use of a broken or weak
@@ -38,6 +39,10 @@ module NormalHashFunction {
or
node instanceof Sanitizer
}
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
sensitiveDataExtraStepForCalls(node1, node2)
}
}
}
@@ -70,5 +75,9 @@ module ComputationallyExpensiveHashFunction {
or
node instanceof Sanitizer
}
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
sensitiveDataExtraStepForCalls(node1, node2)
}
}
}

View File

@@ -52,7 +52,9 @@ module NormalHashFunction {
* A source of sensitive data, considered as a flow source.
*/
class SensitiveDataSourceAsSource extends Source, SensitiveDataSource {
override string getClassification() { result = SensitiveDataSource.super.getClassification() }
override SensitiveDataClassification getClassification() {
result = SensitiveDataSource.super.getClassification()
}
}
/** The input to a hashing operation using a weak algorithm, considered as a flow sink. */
@@ -120,12 +122,12 @@ module ComputationallyExpensiveHashFunction {
*/
class PasswordSourceAsSource extends Source, SensitiveDataSource {
PasswordSourceAsSource() {
// TODO: once https://github.com/github/codeql/pull/5739 has been merged,
// don't use hardcoded value anymore
SensitiveDataSource.super.getClassification() = "password"
SensitiveDataSource.super.getClassification() = SensitiveDataClassification::password()
}
override string getClassification() { result = SensitiveDataSource.super.getClassification() }
override SensitiveDataClassification getClassification() {
result = SensitiveDataSource.super.getClassification()
}
}
/**