Merge remote-tracking branch 'upstream/main' into better-syntax-for-false-positives-and-negatives-inline-expectation

Required fixing up semantic conflicts in tests.

Conflicts:
	python/ql/test/experimental/library-tests/frameworks/stdlib/Decoding.py
This commit is contained in:
Jonas Jensen
2020-11-03 09:47:26 +01:00
180 changed files with 3494 additions and 3248 deletions

View File

@@ -6,10 +6,10 @@
*/
import python
import experimental.dataflow.DataFlow
import experimental.dataflow.DataFlow2
import experimental.dataflow.TaintTracking
import experimental.dataflow.TaintTracking2
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.DataFlow2
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.TaintTracking2
/**
* A `DataFlow::Node` that appears as a sink in Config1 and a source in Config2.

View File

@@ -14,35 +14,103 @@
* external/cwe/cwe-036
* external/cwe/cwe-073
* external/cwe/cwe-099
*
* The query detects cases where a user-controlled path is used in an unsafe manner,
* meaning it is not both normalized and _afterwards_ checked.
*
* It does so by dividing the problematic situation into two cases:
* 1. The file path is never normalized.
* This is easily detected by using normalization as a sanitizer.
*
* 2. The file path is normalized at least once, but never checked afterwards.
* This is detected by finding the earliest normalization and then ensuring that
* no checks happen later. Since we start from the earliest normalization,
* we know that the absence of checks means that no normalization has a
* check after it. (No checks after a second normalization would be ok if
* there was a check between the first and the second.)
*
* Note that one could make the dual split on whether the file path is ever checked. This does
* not work as nicely, however, since checking is modelled as a `BarrierGuard` rather than
* as a `Sanitizer`. That means that only some dataflow paths out of a check will be removed,
* and so identifying the last check is not possible simply by finding a dataflow path from it
* to a sink.
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.security.injection.Path
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.DataFlow2
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.TaintTracking2
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import ChainedConfigs12
class PathInjectionConfiguration extends TaintTracking::Configuration {
PathInjectionConfiguration() { this = "Path injection configuration" }
// ---------------------------------------------------------------------------
// Case 1. The path is never normalized.
// ---------------------------------------------------------------------------
/** Configuration to find paths from sources to sinks that contain no normalization. */
class PathNotNormalizedConfiguration extends TaintTracking::Configuration {
PathNotNormalizedConfiguration() { this = "PathNotNormalizedConfiguration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) {
sink = any(FileSystemAccess e).getAPathArgument()
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof OpenNode }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Path::PathNormalization }
}
override predicate isSanitizer(Sanitizer sanitizer) {
sanitizer instanceof PathSanitizer or
sanitizer instanceof NormalizedPathSanitizer
predicate pathNotNormalized(CustomPathNode source, CustomPathNode sink) {
any(PathNotNormalizedConfiguration config).hasFlowPath(source.asNode1(), sink.asNode1())
}
// ---------------------------------------------------------------------------
// Case 2. The path is normalized at least once, but never checked afterwards.
// ---------------------------------------------------------------------------
/** Configuration to find paths from sources to normalizations that contain no prior normalizations. */
class FirstNormalizationConfiguration extends TaintTracking::Configuration {
FirstNormalizationConfiguration() { this = "FirstNormalizationConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink instanceof Path::PathNormalization }
override predicate isSanitizerOut(DataFlow::Node node) { node instanceof Path::PathNormalization }
}
/** Configuration to find paths from normalizations to sinks that do not go through a check. */
class NormalizedPathNotCheckedConfiguration extends TaintTracking2::Configuration {
NormalizedPathNotCheckedConfiguration() { this = "NormalizedPathNotCheckedConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof Path::PathNormalization }
override predicate isSink(DataFlow::Node sink) {
sink = any(FileSystemAccess e).getAPathArgument()
}
override predicate isExtension(TaintTracking::Extension extension) {
extension instanceof AbsPath
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof Path::SafeAccessCheck
}
}
from PathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This path depends on $@.", src.getSource(),
"a user-provided value"
predicate pathNotCheckedAfterNormalization(CustomPathNode source, CustomPathNode sink) {
exists(
FirstNormalizationConfiguration config, DataFlow::PathNode mid1, DataFlow2::PathNode mid2,
NormalizedPathNotCheckedConfiguration config2
|
config.hasFlowPath(source.asNode1(), mid1) and
config2.hasFlowPath(mid2, sink.asNode2()) and
mid1.getNode().asCfgNode() = mid2.getNode().asCfgNode()
)
}
// ---------------------------------------------------------------------------
// Query: Either case 1 or case 2.
// ---------------------------------------------------------------------------
from CustomPathNode source, CustomPathNode sink
where
pathNotNormalized(source, sink)
or
pathNotCheckedAfterNormalization(source, sink)
select sink, source, sink, "This path depends on $@.", source, "a user-provided value"

View File

@@ -15,29 +15,51 @@
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.security.injection.Command
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import DataFlow::PathGraph
class CommandInjectionConfiguration extends TaintTracking::Configuration {
CommandInjectionConfiguration() { this = "Command injection configuration" }
CommandInjectionConfiguration() { this = "CommandInjectionConfiguration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(TaintTracking::Sink sink) { sink instanceof CommandSink }
override predicate isExtension(TaintTracking::Extension extension) {
extension instanceof FirstElementFlow
or
extension instanceof FabricExecuteExtension
override predicate isSink(DataFlow::Node sink) {
sink = any(SystemCommandExecution e).getCommand() and
// Since the implementation of standard library functions such `os.popen` looks like
// ```py
// def popen(cmd, mode="r", buffering=-1):
// ...
// proc = subprocess.Popen(cmd, ...)
// ```
// any time we would report flow to the `os.popen` sink, we can ALSO report the flow
// from the `cmd` parameter to the `subprocess.Popen` sink -- obviously we don't
// want that.
//
// However, simply removing taint edges out of a sink is not a good enough solution,
// since we would only flag one of the `os.system` calls in the following example
// due to use-use flow
// ```py
// os.system(cmd)
// os.system(cmd)
// ```
//
// Best solution I could come up with is to exclude all sinks inside the modules of
// known sinks. This does have a downside: If we have overlooked a function in any
// of these, that internally runs a command, we no longer give an alert :| -- and we
// need to keep them updated (which is hard to remember)
//
// This does not only affect `os.popen`, but also the helper functions in
// `subprocess`. See:
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
not sink.getScope().getEnclosingModule().getName() in ["os", "subprocess", "platform", "popen2"]
}
}
from CommandInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This command depends on $@.", src.getSource(),
from CommandInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This command depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -13,30 +13,26 @@
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.web.HttpResponse
/* Flow */
import semmle.python.security.strings.Untrusted
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import DataFlow::PathGraph
class ReflectedXssConfiguration extends TaintTracking::Configuration {
ReflectedXssConfiguration() { this = "Reflected XSS configuration" }
ReflectedXssConfiguration() { this = "ReflectedXssConfiguration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(TaintTracking::Sink sink) {
sink instanceof HttpResponseTaintSink and
not sink instanceof DjangoResponseContent
or
sink instanceof DjangoResponseContentXSSVulnerable
override predicate isSink(DataFlow::Node sink) {
exists(HTTP::Server::HttpResponse response |
response.getMimetype().toLowerCase() = "text/html" and
sink = response.getBody()
)
}
}
from ReflectedXssConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "Cross-site scripting vulnerability due to $@.", src.getSource(),
"a user-provided value"
from ReflectedXssConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",
source.getNode(), "a user-provided value"

41
python/ql/src/Security/CWE-089/SqlInjection.ql Executable file → Normal file
View File

@@ -12,40 +12,21 @@
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.security.injection.Sql
import semmle.python.web.django.Db
import semmle.python.web.django.Model
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import DataFlow::PathGraph
class SQLInjectionConfiguration extends TaintTracking::Configuration {
SQLInjectionConfiguration() { this = "SQL injection configuration" }
SQLInjectionConfiguration() { this = "SQLInjectionConfiguration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(TaintTracking::Sink sink) { sink instanceof SqlInjectionSink }
override predicate isSink(DataFlow::Node sink) { sink = any(SqlExecution e).getSql() }
}
/*
* Additional configuration to support tracking of DB objects. Connections, cursors, etc.
* Without this configuration (or the LegacyConfiguration), the pattern of
* `any(MyTaintKind k).taints(control_flow_node)` used in DbConnectionExecuteArgument would not work.
*/
class DbConfiguration extends TaintTracking::Configuration {
DbConfiguration() { this = "DB configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof DjangoModelObjects or
source instanceof DbConnectionSource
}
}
from SQLInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This SQL query depends on $@.", src.getSource(),
from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -1,6 +1,6 @@
/**
* @name Code injection
* @description Interpreting unsanitized user input as code allows a malicious user arbitrary
* @description Interpreting unsanitized user input as code allows a malicious user to perform arbitrary
* code execution.
* @kind path-problem
* @problem.severity error
@@ -15,23 +15,21 @@
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.security.injection.Exec
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import DataFlow::PathGraph
class CodeInjectionConfiguration extends TaintTracking::Configuration {
CodeInjectionConfiguration() { this = "Code injection configuration" }
CodeInjectionConfiguration() { this = "CodeInjectionConfiguration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(TaintTracking::Sink sink) { sink instanceof StringEvaluationNode }
override predicate isSink(DataFlow::Node sink) { sink = any(CodeExecution e).getCode() }
}
from CodeInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "$@ flows to here and is interpreted as code.", src.getSource(),
"A user-provided value"
from CodeInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
source.getNode(), "A user-provided value"

View File

@@ -12,26 +12,25 @@
*/
import python
import semmle.python.security.Paths
// Sources -- Any untrusted input
import semmle.python.web.HttpRequest
// Flow -- untrusted string
import semmle.python.security.strings.Untrusted
// Sink -- Unpickling and other deserialization formats.
import semmle.python.security.injection.Pickle
import semmle.python.security.injection.Marshal
import semmle.python.security.injection.Yaml
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
import semmle.python.dataflow.new.RemoteFlowSources
import DataFlow::PathGraph
class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
UnsafeDeserializationConfiguration() { this = "Unsafe deserialization configuration" }
UnsafeDeserializationConfiguration() { this = "UnsafeDeserializationConfiguration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) {
exists(Decoding d |
d.mayExecuteInput() and
sink = d.getAnInput()
)
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof DeserializationSink }
}
from UnsafeDeserializationConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "Deserializing of $@.", src.getSource(), "untrusted input"
from UnsafeDeserializationConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"

View File

@@ -1,116 +0,0 @@
/**
* @name Uncontrolled data used in path expression
* @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
* @kind path-problem
* @problem.severity error
* @sub-severity high
* @precision high
* @id py/path-injection
* @tags correctness
* security
* external/owasp/owasp-a1
* external/cwe/cwe-022
* external/cwe/cwe-023
* external/cwe/cwe-036
* external/cwe/cwe-073
* external/cwe/cwe-099
*
* The query detects cases where a user-controlled path is used in an unsafe manner,
* meaning it is not both normalized and _afterwards_ checked.
*
* It does so by dividing the problematic situation into two cases:
* 1. The file path is never normalized.
* This is easily detected by using normalization as a sanitizer.
*
* 2. The file path is normalized at least once, but never checked afterwards.
* This is detected by finding the earliest normalization and then ensuring that
* no checks happen later. Since we start from the earliest normalization,
* we know that the absence of checks means that no normalization has a
* check after it. (No checks after a second normalization would be ok if
* there was a check between the first and the second.)
*
* Note that one could make the dual split on whether the file path is ever checked. This does
* not work as nicely, however, since checking is modelled as a `BarrierGuard` rather than
* as a `Sanitizer`. That means that only some dataflow paths out of a check will be removed,
* and so identifying the last check is not possible simply by finding a dataflow path from it
* to a sink.
*/
import python
import experimental.dataflow.DataFlow
import experimental.dataflow.DataFlow2
import experimental.dataflow.TaintTracking
import experimental.dataflow.TaintTracking2
import experimental.semmle.python.Concepts
import experimental.dataflow.RemoteFlowSources
import ChainedConfigs12
// ---------------------------------------------------------------------------
// Case 1. The path is never normalized.
// ---------------------------------------------------------------------------
/** Configuration to find paths from sources to sinks that contain no normalization. */
class PathNotNormalizedConfiguration extends TaintTracking::Configuration {
PathNotNormalizedConfiguration() { this = "PathNotNormalizedConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) {
sink = any(FileSystemAccess e).getAPathArgument()
}
override predicate isSanitizer(DataFlow::Node node) { node instanceof Path::PathNormalization }
}
predicate pathNotNormalized(CustomPathNode source, CustomPathNode sink) {
any(PathNotNormalizedConfiguration config).hasFlowPath(source.asNode1(), sink.asNode1())
}
// ---------------------------------------------------------------------------
// Case 2. The path is normalized at least once, but never checked afterwards.
// ---------------------------------------------------------------------------
/** Configuration to find paths from sources to normalizations that contain no prior normalizations. */
class FirstNormalizationConfiguration extends TaintTracking::Configuration {
FirstNormalizationConfiguration() { this = "FirstNormalizationConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink instanceof Path::PathNormalization }
override predicate isSanitizerOut(DataFlow::Node node) { node instanceof Path::PathNormalization }
}
/** Configuration to find paths from normalizations to sinks that do not go through a check. */
class NormalizedPathNotCheckedConfiguration extends TaintTracking2::Configuration {
NormalizedPathNotCheckedConfiguration() { this = "NormalizedPathNotCheckedConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof Path::PathNormalization }
override predicate isSink(DataFlow::Node sink) {
sink = any(FileSystemAccess e).getAPathArgument()
}
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof Path::SafeAccessCheck
}
}
predicate pathNotCheckedAfterNormalization(CustomPathNode source, CustomPathNode sink) {
exists(
FirstNormalizationConfiguration config, DataFlow::PathNode mid1, DataFlow2::PathNode mid2,
NormalizedPathNotCheckedConfiguration config2
|
config.hasFlowPath(source.asNode1(), mid1) and
config2.hasFlowPath(mid2, sink.asNode2()) and
mid1.getNode().asCfgNode() = mid2.getNode().asCfgNode()
)
}
// ---------------------------------------------------------------------------
// Query: Either case 1 or case 2.
// ---------------------------------------------------------------------------
from CustomPathNode source, CustomPathNode sink
where
pathNotNormalized(source, sink)
or
pathNotCheckedAfterNormalization(source, sink)
select sink, source, sink, "This path depends on $@.", source, "a user-provided value"

View File

@@ -1,65 +0,0 @@
/**
* @name Uncontrolled command line
* @description Using externally controlled strings in a command line may allow a malicious
* user to change the meaning of the command.
* @kind path-problem
* @problem.severity error
* @sub-severity high
* @precision high
* @id py/command-line-injection
* @tags correctness
* security
* external/owasp/owasp-a1
* external/cwe/cwe-078
* external/cwe/cwe-088
*/
import python
import experimental.dataflow.DataFlow
import experimental.dataflow.TaintTracking
import experimental.semmle.python.Concepts
import experimental.dataflow.RemoteFlowSources
import DataFlow::PathGraph
class CommandInjectionConfiguration extends TaintTracking::Configuration {
CommandInjectionConfiguration() { this = "CommandInjectionConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) {
sink = any(SystemCommandExecution e).getCommand() and
// Since the implementation of standard library functions such `os.popen` looks like
// ```py
// def popen(cmd, mode="r", buffering=-1):
// ...
// proc = subprocess.Popen(cmd, ...)
// ```
// any time we would report flow to the `os.popen` sink, we can ALSO report the flow
// from the `cmd` parameter to the `subprocess.Popen` sink -- obviously we don't
// want that.
//
// However, simply removing taint edges out of a sink is not a good enough solution,
// since we would only flag one of the `os.system` calls in the following example
// due to use-use flow
// ```py
// os.system(cmd)
// os.system(cmd)
// ```
//
// Best solution I could come up with is to exclude all sinks inside the modules of
// known sinks. This does have a downside: If we have overlooked a function in any
// of these, that internally runs a command, we no longer give an alert :| -- and we
// need to keep them updated (which is hard to remember)
//
// This does not only affect `os.popen`, but also the helper functions in
// `subprocess`. See:
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
not sink.getScope().getEnclosingModule().getName() in ["os", "subprocess", "platform", "popen2"]
}
}
from CommandInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This command depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -1,38 +0,0 @@
/**
* @name Reflected server-side cross-site scripting
* @description Writing user input directly to a web page
* allows for a cross-site scripting vulnerability.
* @kind path-problem
* @problem.severity error
* @sub-severity high
* @precision high
* @id py/reflective-xss
* @tags security
* external/cwe/cwe-079
* external/cwe/cwe-116
*/
import python
import experimental.dataflow.DataFlow
import experimental.dataflow.TaintTracking
import experimental.semmle.python.Concepts
import experimental.dataflow.RemoteFlowSources
import DataFlow::PathGraph
class ReflectedXssConfiguration extends TaintTracking::Configuration {
ReflectedXssConfiguration() { this = "ReflectedXssConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) {
exists(HTTP::Server::HttpResponse response |
response.getMimetype().toLowerCase() = "text/html" and
sink = response.getBody()
)
}
}
from ReflectedXssConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",
source.getNode(), "a user-provided value"

View File

@@ -1,32 +0,0 @@
/**
* @name SQL query built from user-controlled sources
* @description Building a SQL query from user-controlled sources is vulnerable to insertion of
* malicious SQL code by the user.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/sql-injection
* @tags security
* external/cwe/cwe-089
* external/owasp/owasp-a1
*/
import python
import experimental.dataflow.DataFlow
import experimental.dataflow.TaintTracking
import experimental.semmle.python.Concepts
import experimental.dataflow.RemoteFlowSources
import DataFlow::PathGraph
class SQLInjectionConfiguration extends TaintTracking::Configuration {
SQLInjectionConfiguration() { this = "SQLInjectionConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink = any(SqlExecution e).getSql() }
}
from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -1,35 +0,0 @@
/**
* @name Code injection
* @description Interpreting unsanitized user input as code allows a malicious user to perform arbitrary
* code execution.
* @kind path-problem
* @problem.severity error
* @sub-severity high
* @precision high
* @id py/code-injection
* @tags security
* external/owasp/owasp-a1
* external/cwe/cwe-094
* external/cwe/cwe-095
* external/cwe/cwe-116
*/
import python
import experimental.dataflow.DataFlow
import experimental.dataflow.TaintTracking
import experimental.semmle.python.Concepts
import experimental.dataflow.RemoteFlowSources
import DataFlow::PathGraph
class CodeInjectionConfiguration extends TaintTracking::Configuration {
CodeInjectionConfiguration() { this = "CodeInjectionConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink = any(CodeExecution e).getCode() }
}
from CodeInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
source.getNode(), "A user-provided value"

View File

@@ -1,36 +0,0 @@
/**
* @name Deserializing untrusted input
* @description Deserializing user-controlled data may allow attackers to execute arbitrary code.
* @kind path-problem
* @id py/unsafe-deserialization
* @problem.severity error
* @sub-severity high
* @precision high
* @tags external/cwe/cwe-502
* security
* serialization
*/
import python
import experimental.dataflow.DataFlow
import experimental.dataflow.TaintTracking
import experimental.semmle.python.Concepts
import experimental.dataflow.RemoteFlowSources
import DataFlow::PathGraph
class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
UnsafeDeserializationConfiguration() { this = "UnsafeDeserializationConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) {
exists(Decoding d |
d.mayExecuteInput() and
sink = d.getAnInput()
)
}
}
from UnsafeDeserializationConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"

View File

@@ -0,0 +1,48 @@
/**
* @name Uncontrolled data used in path expression
* @description Accessing paths influenced by users can allow an attacker to access unexpected resources.
* @kind path-problem
* @problem.severity error
* @sub-severity high
* @precision high
* @id py/path-injection
* @tags correctness
* security
* external/owasp/owasp-a1
* external/cwe/cwe-022
* external/cwe/cwe-023
* external/cwe/cwe-036
* external/cwe/cwe-073
* external/cwe/cwe-099
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.security.injection.Path
class PathInjectionConfiguration extends TaintTracking::Configuration {
PathInjectionConfiguration() { this = "Path injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof OpenNode }
override predicate isSanitizer(Sanitizer sanitizer) {
sanitizer instanceof PathSanitizer or
sanitizer instanceof NormalizedPathSanitizer
}
override predicate isExtension(TaintTracking::Extension extension) {
extension instanceof AbsPath
}
}
from PathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This path depends on $@.", src.getSource(),
"a user-provided value"

View File

@@ -0,0 +1,43 @@
/**
* @name Uncontrolled command line
* @description Using externally controlled strings in a command line may allow a malicious
* user to change the meaning of the command.
* @kind path-problem
* @problem.severity error
* @sub-severity high
* @precision high
* @id py/command-line-injection
* @tags correctness
* security
* external/owasp/owasp-a1
* external/cwe/cwe-078
* external/cwe/cwe-088
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.security.injection.Command
class CommandInjectionConfiguration extends TaintTracking::Configuration {
CommandInjectionConfiguration() { this = "Command injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof CommandSink }
override predicate isExtension(TaintTracking::Extension extension) {
extension instanceof FirstElementFlow
or
extension instanceof FabricExecuteExtension
}
}
from CommandInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This command depends on $@.", src.getSource(),
"a user-provided value"

View File

@@ -0,0 +1,42 @@
/**
* @name Reflected server-side cross-site scripting
* @description Writing user input directly to a web page
* allows for a cross-site scripting vulnerability.
* @kind path-problem
* @problem.severity error
* @sub-severity high
* @precision high
* @id py/reflective-xss
* @tags security
* external/cwe/cwe-079
* external/cwe/cwe-116
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.web.HttpResponse
/* Flow */
import semmle.python.security.strings.Untrusted
class ReflectedXssConfiguration extends TaintTracking::Configuration {
ReflectedXssConfiguration() { this = "Reflected XSS configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) {
sink instanceof HttpResponseTaintSink and
not sink instanceof DjangoResponseContent
or
sink instanceof DjangoResponseContentXSSVulnerable
}
}
from ReflectedXssConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "Cross-site scripting vulnerability due to $@.", src.getSource(),
"a user-provided value"

View File

@@ -0,0 +1,51 @@
/**
* @name SQL query built from user-controlled sources
* @description Building a SQL query from user-controlled sources is vulnerable to insertion of
* malicious SQL code by the user.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/sql-injection
* @tags security
* external/cwe/cwe-089
* external/owasp/owasp-a1
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.security.injection.Sql
import semmle.python.web.django.Db
import semmle.python.web.django.Model
class SQLInjectionConfiguration extends TaintTracking::Configuration {
SQLInjectionConfiguration() { this = "SQL injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof SqlInjectionSink }
}
/*
* Additional configuration to support tracking of DB objects. Connections, cursors, etc.
* Without this configuration (or the LegacyConfiguration), the pattern of
* `any(MyTaintKind k).taints(control_flow_node)` used in DbConnectionExecuteArgument would not work.
*/
class DbConfiguration extends TaintTracking::Configuration {
DbConfiguration() { this = "DB configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof DjangoModelObjects or
source instanceof DbConnectionSource
}
}
from SQLInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This SQL query depends on $@.", src.getSource(),
"a user-provided value"

View File

@@ -0,0 +1,37 @@
/**
* @name Code injection
* @description Interpreting unsanitized user input as code allows a malicious user arbitrary
* code execution.
* @kind path-problem
* @problem.severity error
* @sub-severity high
* @precision high
* @id py/code-injection
* @tags security
* external/owasp/owasp-a1
* external/cwe/cwe-094
* external/cwe/cwe-095
* external/cwe/cwe-116
*/
import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import semmle.python.security.injection.Exec
class CodeInjectionConfiguration extends TaintTracking::Configuration {
CodeInjectionConfiguration() { this = "Code injection configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof StringEvaluationNode }
}
from CodeInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "$@ flows to here and is interpreted as code.", src.getSource(),
"A user-provided value"

View File

@@ -0,0 +1,37 @@
/**
* @name Deserializing untrusted input
* @description Deserializing user-controlled data may allow attackers to execute arbitrary code.
* @kind path-problem
* @id py/unsafe-deserialization
* @problem.severity error
* @sub-severity high
* @precision high
* @tags external/cwe/cwe-502
* security
* serialization
*/
import python
import semmle.python.security.Paths
// Sources -- Any untrusted input
import semmle.python.web.HttpRequest
// Flow -- untrusted string
import semmle.python.security.strings.Untrusted
// Sink -- Unpickling and other deserialization formats.
import semmle.python.security.injection.Pickle
import semmle.python.security.injection.Marshal
import semmle.python.security.injection.Yaml
class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
UnsafeDeserializationConfiguration() { this = "Unsafe deserialization configuration" }
override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof DeserializationSink }
}
from UnsafeDeserializationConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "Deserializing of $@.", src.getSource(), "untrusted input"

View File

@@ -1,6 +0,0 @@
import experimental.dataflow.internal.TaintTrackingPublic as Public
module Private {
import experimental.dataflow.DataFlow::DataFlow as DataFlow
import experimental.dataflow.internal.TaintTrackingPrivate
}

View File

@@ -1,6 +0,0 @@
import experimental.dataflow.internal.TaintTrackingPublic as Public
module Private {
import experimental.dataflow.DataFlow2::DataFlow2 as DataFlow
import experimental.dataflow.internal.TaintTrackingPrivate
}

View File

@@ -1,6 +0,0 @@
import experimental.dataflow.internal.TaintTrackingPublic as Public
module Private {
import experimental.dataflow.DataFlow3::DataFlow3 as DataFlow
import experimental.dataflow.internal.TaintTrackingPrivate
}

View File

@@ -1,6 +0,0 @@
import experimental.dataflow.internal.TaintTrackingPublic as Public
module Private {
import experimental.dataflow.DataFlow4::DataFlow4 as DataFlow
import experimental.dataflow.internal.TaintTrackingPrivate
}

View File

@@ -1,11 +0,0 @@
/**
* Helper file that imports all framework modeling.
*/
private import experimental.semmle.python.frameworks.Dill
private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Fabric
private import experimental.semmle.python.frameworks.Flask
private import experimental.semmle.python.frameworks.Invoke
private import experimental.semmle.python.frameworks.Stdlib
private import experimental.semmle.python.frameworks.Yaml

View File

@@ -35,3 +35,7 @@ import semmle.python.pointsto.Context
import semmle.python.pointsto.CallGraph
import semmle.python.objects.ObjectAPI
import site
// Removing this import perturbs the compilation process enough that the points-to analysis gets
// compiled -- and cached -- differently depending on whether the data flow library is imported. By
// importing it privately here, we ensure that the points-to analysis is compiled the same way.
private import semmle.python.dataflow.new.DataFlow

View File

@@ -5,9 +5,10 @@
*/
import python
private import experimental.dataflow.DataFlow
private import experimental.semmle.python.Frameworks
private import experimental.dataflow.RemoteFlowSources
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Frameworks
/**
* A data-flow node that executes an operating system command,
@@ -113,8 +114,9 @@ module Path {
* is intended to include deserialization, unmarshalling, decoding, unpickling,
* decompressing, decrypting, parsing etc.
*
* Doing so should normally preserve taint, but it can also be a problem
* in itself, e.g. if it allows code execution or could result in denial-of-service.
* A decoding (automatically) preserves taint from input to output. However, it can
* also be a problem in itself, for example if it allows code execution or could result
* in denial-of-service.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Decoding::Range` instead.
@@ -144,8 +146,9 @@ module Decoding {
* is intended to include deserialization, unmarshalling, decoding, unpickling,
* decompressing, decrypting, parsing etc.
*
* Doing so should normally preserve taint, but it can also be a problem
* in itself, e.g. if it allows code execution or could result in denial-of-service.
* A decoding (automatically) preserves taint from input to output. However, it can
* also be a problem in itself, for example if it allows code execution or could result
* in denial-of-service.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `Decoding` instead.
@@ -165,6 +168,73 @@ module Decoding {
}
}
private class DecodingAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(Decoding decoding |
nodeFrom = decoding.getAnInput() and
nodeTo = decoding.getOutput()
)
}
}
/**
* A data-flow node that encodes data to a binary or textual format. This
* is intended to include serialization, marshalling, encoding, pickling,
* compressing, encrypting, etc.
*
* An encoding (automatically) preserves taint from input to output.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `Encoding::Range` instead.
*/
class Encoding extends DataFlow::Node {
Encoding::Range range;
Encoding() { this = range }
/** Gets an input that is encoded by this function. */
DataFlow::Node getAnInput() { result = range.getAnInput() }
/** Gets the output that contains the encoded data produced by this function. */
DataFlow::Node getOutput() { result = range.getOutput() }
/** Gets an identifier for the format this function decodes from, such as "JSON". */
string getFormat() { result = range.getFormat() }
}
/** Provides a class for modeling new encoding mechanisms. */
module Encoding {
/**
* A data-flow node that encodes data to a binary or textual format. This
* is intended to include serialization, marshalling, encoding, pickling,
* compressing, encrypting, etc.
*
* An encoding (automatically) preserves taint from input to output.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `Encoding` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets an input that is encoded by this function. */
abstract DataFlow::Node getAnInput();
/** Gets the output that contains the encoded data produced by this function. */
abstract DataFlow::Node getOutput();
/** Gets an identifier for the format this function decodes from, such as "JSON". */
abstract string getFormat();
}
}
private class EncodingAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(Encoding encoding |
nodeFrom = encoding.getAnInput() and
nodeTo = encoding.getOutput()
)
}
}
/**
* A data-flow node that dynamically executes Python code.
*

View File

@@ -11,10 +11,10 @@
import python
/* General import that is useful */
// import experimental.dataflow.DataFlow
// import semmle.python.dataflow.new.DataFlow
//
/* for extending `TaintTracking::AdditionalTaintStep` */
// import experimental.dataflow.TaintTracking
// import semmle.python.dataflow.new.TaintTracking
//
/* for extending `RemoteFlowSource::Range` */
// import experimental.dataflow.RemoteFlowSources
// import semmle.python.dataflow.new.RemoteFlowSources

View File

@@ -0,0 +1,13 @@
/**
* Helper file that imports all framework modeling.
*/
private import semmle.python.frameworks.Dill
private import semmle.python.frameworks.Django
private import semmle.python.frameworks.Fabric
private import semmle.python.frameworks.Flask
private import semmle.python.frameworks.Invoke
private import semmle.python.frameworks.MySQLdb
private import semmle.python.frameworks.MysqlConnectorPython
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.Yaml

View File

@@ -1,141 +1 @@
import python
import semmle.python.dataflow.TaintTracking
private import semmle.python.objects.ObjectInternal
private import semmle.python.dataflow.Implementation
module TaintTracking {
class Source = TaintSource;
class Sink = TaintSink;
class Extension = DataFlowExtension::DataFlowNode;
class PathSource = TaintTrackingNode;
class PathSink = TaintTrackingNode;
abstract class Configuration extends string {
/* Required to prevent compiler warning */
bindingset[this]
Configuration() { this = this }
/* Old implementation API */
predicate isSource(Source src) { none() }
predicate isSink(Sink sink) { none() }
predicate isSanitizer(Sanitizer sanitizer) { none() }
predicate isExtension(Extension extension) { none() }
/* New implementation API */
/**
* Holds if `src` is a source of taint of `kind` that is relevant
* for this configuration.
*/
predicate isSource(DataFlow::Node src, TaintKind kind) {
exists(TaintSource taintSrc |
this.isSource(taintSrc) and
src.asCfgNode() = taintSrc and
taintSrc.isSourceOf(kind)
)
}
/**
* Holds if `sink` is a sink of taint of `kind` that is relevant
* for this configuration.
*/
predicate isSink(DataFlow::Node sink, TaintKind kind) {
exists(TaintSink taintSink |
this.isSink(taintSink) and
sink.asCfgNode() = taintSink and
taintSink.sinks(kind)
)
}
/**
* Holds if `src -> dest` should be considered as a flow edge
* in addition to standard data flow edges.
*/
predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node dest) { none() }
/**
* Holds if `src -> dest` is a flow edge converting taint from `srckind` to `destkind`.
*/
predicate isAdditionalFlowStep(
DataFlow::Node src, DataFlow::Node dest, TaintKind srckind, TaintKind destkind
) {
none()
}
/**
* Holds if `node` should be considered as a barrier to flow of any kind.
*/
predicate isBarrier(DataFlow::Node node) { none() }
/**
* Holds if `node` should be considered as a barrier to flow of `kind`.
*/
predicate isBarrier(DataFlow::Node node, TaintKind kind) {
exists(Sanitizer sanitizer | this.isSanitizer(sanitizer) |
sanitizer.sanitizingNode(kind, node.asCfgNode())
or
sanitizer.sanitizingEdge(kind, node.asVariable())
or
sanitizer.sanitizingSingleEdge(kind, node.asVariable())
or
sanitizer.sanitizingDefinition(kind, node.asVariable())
or
exists(MethodCallsiteRefinement call, FunctionObject callee |
call = node.asVariable().getDefinition() and
callee.getACall() = call.getCall() and
sanitizer.sanitizingCall(kind, callee)
)
)
}
/**
* Holds if flow from `src` to `dest` is prohibited.
*/
predicate isBarrierEdge(DataFlow::Node src, DataFlow::Node dest) { none() }
/**
* Holds if control flow from `test` along the `isTrue` edge is prohibited.
*/
predicate isBarrierTest(ControlFlowNode test, boolean isTrue) { none() }
/**
* Holds if flow from `src` to `dest` is prohibited when the incoming taint is `srckind` and the outgoing taint is `destkind`.
* Note that `srckind` and `destkind` can be the same.
*/
predicate isBarrierEdge(
DataFlow::Node src, DataFlow::Node dest, TaintKind srckind, TaintKind destkind
) {
none()
}
/* Common query API */
predicate hasFlowPath(PathSource src, PathSink sink) {
this.(TaintTrackingImplementation).hasFlowPath(src, sink)
}
/* Old query API */
/* deprecated */
deprecated predicate hasFlow(Source src, Sink sink) {
exists(PathSource psrc, PathSink psink |
this.hasFlowPath(psrc, psink) and
src = psrc.getNode().asCfgNode() and
sink = psink.getNode().asCfgNode()
)
}
/* New query API */
predicate hasSimpleFlow(DataFlow::Node src, DataFlow::Node sink) {
exists(PathSource psrc, PathSink psink |
this.hasFlowPath(psrc, psink) and
src = psrc.getNode() and
sink = psink.getNode()
)
}
}
}
import old.Configuration

View File

@@ -1 +1 @@
import semmle.python.dataflow.TaintTracking
import old.DataFlow

View File

@@ -1,19 +1 @@
import python
import semmle.python.dataflow.TaintTracking
class OpenFile extends TaintKind {
OpenFile() { this = "file.open" }
override string repr() { result = "an open file" }
}
class OpenFileConfiguration extends TaintTracking::Configuration {
OpenFileConfiguration() { this = "Open file configuration" }
override predicate isSource(DataFlow::Node src, TaintKind kind) {
src.asCfgNode() = Value::named("open").getACall() and
kind instanceof OpenFile
}
override predicate isSink(DataFlow::Node sink, TaintKind kind) { none() }
}
import old.Files

File diff suppressed because it is too large Load Diff

View File

@@ -1,68 +1 @@
import semmle.python.dataflow.TaintTracking
private import semmle.python.objects.ObjectInternal
import semmle.python.dataflow.Implementation
/* For backwards compatibility -- Use `TaintTrackingContext` instead. */
deprecated class CallContext extends TaintTrackingContext {
TaintTrackingContext getCallee(CallNode call) { result.getCaller(call) = this }
predicate appliesToScope(Scope s) {
exists(PythonFunctionObjectInternal func, TaintKind param, AttributePath path, int n |
this = TParamContext(param, path, n) and
exists(TaintTrackingImplementation impl |
impl.callWithTaintedArgument(_, _, _, func, n, path, param) and
s = func.getScope()
)
)
or
this.isTop()
}
}
/* Backwards compatibility with config-less taint-tracking */
private class LegacyConfiguration extends TaintTracking::Configuration {
LegacyConfiguration() {
/* A name that won't be accidentally chosen by users */
this = "Semmle: Internal legacy configuration"
}
override predicate isSource(TaintSource src) { src = src }
override predicate isSink(TaintSink sink) { sink = sink }
override predicate isSanitizer(Sanitizer sanitizer) { sanitizer = sanitizer }
override predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node dest) {
exists(DataFlowExtension::DataFlowNode legacyExtension | src.asCfgNode() = legacyExtension |
dest.asCfgNode() = legacyExtension.getASuccessorNode()
or
dest.asVariable() = legacyExtension.getASuccessorVariable()
or
dest.asCfgNode() = legacyExtension.getAReturnSuccessorNode(_)
or
dest.asCfgNode() = legacyExtension.getACalleeSuccessorNode(_)
)
}
override predicate isAdditionalFlowStep(
DataFlow::Node src, DataFlow::Node dest, TaintKind srckind, TaintKind destkind
) {
exists(DataFlowExtension::DataFlowNode legacyExtension | src.asCfgNode() = legacyExtension |
dest.asCfgNode() = legacyExtension.getASuccessorNode(srckind, destkind)
)
}
override predicate isBarrierEdge(DataFlow::Node src, DataFlow::Node dest) {
(
exists(DataFlowExtension::DataFlowVariable legacyExtension |
src.asVariable() = legacyExtension and
legacyExtension.prunedSuccessor(dest.asVariable())
)
or
exists(DataFlowExtension::DataFlowNode legacyExtension |
src.asCfgNode() = legacyExtension and
legacyExtension.prunedSuccessor(dest.asCfgNode())
)
)
}
}
import old.Legacy

View File

@@ -1,174 +1 @@
/**
* Provides classes and predicates for tracking global state across the control flow and call graphs.
*
* NOTE: State tracking tracks both whether a state may apply to a given node in a given context *and*
* whether it may not apply.
* That `state.appliesTo(f, ctx)` holds implies nothing about whether `state.mayNotApplyTo(f, ctx)` holds.
* Neither may hold which merely means that `f` with context `ctx` is not reached during the analysis.
* Conversely, both may hold, which means that `state` may or may not apply depending on how `f` was reached.
*/
import python
private import semmle.python.pointsto.Base
private import semmle.python.pointsto.PointsTo
private import semmle.python.pointsto.PointsToContext
private import semmle.python.objects.ObjectInternal
/** A state that should be tracked. */
abstract class TrackableState extends string {
bindingset[this]
TrackableState() { this = this }
/** Holds if this state may apply to the control flow node `f`, regardless of the context. */
final predicate appliesTo(ControlFlowNode f) { this.appliesTo(f, _) }
/** Holds if this state may not apply to the control flow node `f`, given the context `ctx`. */
final predicate appliesTo(ControlFlowNode f, Context ctx) {
StateTracking::appliesToNode(this, f, ctx, true)
}
/** Holds if this state may apply to the control flow node `f`, given the context `ctx`. */
final predicate mayNotApplyTo(ControlFlowNode f, Context ctx) {
StateTracking::appliesToNode(this, f, ctx, false)
}
/** Holds if this state may apply to the control flow node `f`, regardless of the context. */
final predicate mayNotApplyTo(ControlFlowNode f) { this.mayNotApplyTo(f, _) }
/** Holds if `test` shows value to be untainted with `taint`, given the context `ctx`. */
predicate testsFor(PyEdgeRefinement test, Context ctx, boolean sense) {
ctx.appliesToScope(test.getScope()) and this.testsFor(test, sense)
}
/** Holds if `test` shows value to be untainted with `taint` */
predicate testsFor(PyEdgeRefinement test, boolean sense) { none() }
/**
* Holds if state starts at `f`.
* Either this predicate or `startsAt(ControlFlowNode f, Context ctx)`
* should be overriden by sub-classes.
*/
predicate startsAt(ControlFlowNode f) { none() }
/**
* Holds if state starts at `f` given context `ctx`.
* Either this predicate or `startsAt(ControlFlowNode f)`
* should be overriden by sub-classes.
*/
pragma[noinline]
predicate startsAt(ControlFlowNode f, Context ctx) { ctx.appliesTo(f) and this.startsAt(f) }
/**
* Holds if state ends at `f`.
* Either this predicate or `endsAt(ControlFlowNode f, Context ctx)`
* may be overriden by sub-classes.
*/
predicate endsAt(ControlFlowNode f) { none() }
/**
* Holds if state ends at `f` given context `ctx`.
* Either this predicate or `endsAt(ControlFlowNode f)`
* may be overriden by sub-classes.
*/
pragma[noinline]
predicate endsAt(ControlFlowNode f, Context ctx) { ctx.appliesTo(f) and this.endsAt(f) }
}
module StateTracking {
private predicate not_allowed(TrackableState state, ControlFlowNode f, Context ctx, boolean sense) {
state.endsAt(f, ctx) and sense = true
or
state.startsAt(f, ctx) and sense = false
}
/**
* Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) to
* control flow node `f` given the context `ctx`.
*/
predicate appliesToNode(TrackableState state, ControlFlowNode f, Context ctx, boolean sense) {
state.endsAt(f, ctx) and sense = false
or
state.startsAt(f, ctx) and sense = true
or
not not_allowed(state, f, ctx, sense) and
(
exists(BasicBlock b |
/* First node in a block */
f = b.getNode(0) and appliesAtBlockStart(state, b, ctx, sense)
or
/* Other nodes in block, except trackable calls */
exists(int n |
f = b.getNode(n) and
appliesToNode(state, b.getNode(n - 1), ctx, sense) and
not exists(PythonFunctionObjectInternal func, Context callee |
callee.fromCall(f, func, ctx)
)
)
)
or
/* Function entry via call */
exists(PythonFunctionObjectInternal func, CallNode call, Context caller |
ctx.fromCall(call, func, caller) and
func.getScope().getEntryNode() = f and
appliesToNode(state, call.getAPredecessor(), caller, sense)
)
or
/* Function return */
exists(PythonFunctionObjectInternal func, Context callee |
callee.fromCall(f, func, ctx) and
appliesToNode(state, func.getScope().getANormalExit(), callee, sense)
)
or
/* Other scope entries */
exists(Scope s |
s.getEntryNode() = f and
ctx.appliesToScope(s)
|
not exists(Scope pred | pred.precedes(s)) and
(ctx.isImport() or ctx.isRuntime()) and
sense = false
or
exists(Scope pred, Context pred_ctx |
appliesToNode(state, pred.getANormalExit(), pred_ctx, sense) and
pred.precedes(s) and
ctx.isRuntime()
|
pred_ctx.isRuntime() or pred_ctx.isImport()
)
)
)
}
/**
* Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) at the
* start of basic block `block` given the context `ctx`.
*/
private predicate appliesAtBlockStart(
TrackableState state, BasicBlock block, Context ctx, boolean sense
) {
exists(PyEdgeRefinement test |
test.getSuccessor() = block and
state.testsFor(test, ctx, sense)
)
or
exists(BasicBlock pred |
pred.getASuccessor() = block and
appliesAtBlockEnd(state, pred, ctx, sense) and
not exists(PyEdgeRefinement test |
test.getPredecessor() = pred and
test.getSuccessor() = block and
state.testsFor(test, sense.booleanNot())
)
)
}
/**
* Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) at the
* end of basic block `block` given the context `ctx`.
*/
private predicate appliesAtBlockEnd(
TrackableState state, BasicBlock block, Context ctx, boolean sense
) {
appliesToNode(state, block.getLastNode(), ctx, sense)
}
}
import old.StateTracking

752
python/ql/src/semmle/python/dataflow/TaintTracking.qll Executable file → Normal file
View File

@@ -1,751 +1 @@
/**
* # Python Taint Tracking Library
*
* The taint tracking library is described in three parts.
*
* 1. Specification of kinds, sources, sinks and flows.
* 2. The high level query API
* 3. The implementation.
*
*
* ## Specification
*
* There are four parts to the specification of a taint tracking query.
* These are:
*
* 1. Kinds
*
* The Python taint tracking library supports arbitrary kinds of taint.
* This is useful where you want to track something related to "taint", but that is in itself not dangerous.
* For example, we might want to track the flow of request objects.
* Request objects are not in themselves tainted, but they do contain tainted data.
* For example, the length or timestamp of a request may not pose a risk, but the GET or POST string probably do.
* So, we would want to track request objects distinctly from the request data in the GET or POST field.
*
* Kinds can also specify additional flow steps, but we recommend using the `DataFlowExtension` module,
* which is less likely to cause issues with unwanted recursion.
*
* 2. Sources
*
* Sources of taint can be added by importing a predefined sub-type of `TaintSource`, or by defining new ones.
*
* 3. Sinks (or vulnerabilities)
*
* Sinks can be added by importing a predefined sub-type of `TaintSink`, or by defining new ones.
*
* 4. Flow extensions
*
* Additional flow can be added by importing predefined sub-types of `DataFlowExtension::DataFlowNode`
* or `DataFlowExtension::DataFlowVariable` or by defining new ones.
*
*
* ## The high-level query API
*
* The `TaintedNode` fully describes the taint flow graph.
* The full graph can be expressed as:
*
* ```ql
* from TaintedNode n, TaintedNode s
* where s = n.getASuccessor()
* select n, s
* ```
*
* The source -> sink relation can be expressed either using `TaintedNode`:
* ```ql
* from TaintedNode src, TaintedNode sink
* where src.isSource() and sink.isSink() and src.getASuccessor*() = sink
* select src, sink
* ```
* or, using the specification API:
* ```ql
* from TaintSource src, TaintSink sink
* where src.flowsToSink(sink)
* select src, sink
* ```
*
* ## The implementation
*
* The data-flow graph used by the taint-tracking library is the one created by the points-to analysis,
* and consists of the base data-flow graph defined in `semmle/python/essa/Essa.qll`
* enhanced with precise variable flows, call graph and type information.
* This graph is then enhanced with additional flows as specified above.
* Since the call graph and points-to information is context sensitive, the taint graph must also be context sensitive.
*
* The taint graph is a directed graph where each node consists of a
* `(CFG node, context, taint)` triple although it could be thought of more naturally
* as a number of distinct graphs, one for each input taint-kind consisting of data flow nodes,
* `(CFG node, context)` pairs, labelled with their `taint`.
*
* The `TrackedValue` used in the implementation is not the taint kind specified by the user,
* but describes both the kind of taint and how that taint relates to any object referred to by a data-flow graph node or edge.
* Currently, only two types of `taint` are supported: simple taint, where the object is actually tainted;
* and attribute taint where a named attribute of the referred object is tainted.
*
* Support for tainted members (both specific members of tuples and the like,
* and generic members for mutable collections) are likely to be added in the near future and other forms are possible.
* The types of taints are hard-wired with no user-visible extension method at the moment.
*/
import python
private import semmle.python.pointsto.Filters as Filters
private import semmle.python.objects.ObjectInternal
private import semmle.python.dataflow.Implementation
import semmle.python.dataflow.Configuration
/**
* A 'kind' of taint. This may be almost anything,
* but it is typically something like a "user-defined string".
* Examples include, data from a http request object,
* data from an SMS or other mobile data source,
* or, for a super secure system, environment variables or
* the local file system.
*/
abstract class TaintKind extends string {
bindingset[this]
TaintKind() { any() }
/**
* Gets the kind of taint that the named attribute will have if an object is tainted with this taint.
* In other words, if `x` has this kind of taint then it implies that `x.name`
* has `result` kind of taint.
*/
TaintKind getTaintOfAttribute(string name) { none() }
/**
* Gets the kind of taint results from calling the named method if an object is tainted with this taint.
* In other words, if `x` has this kind of taint then it implies that `x.name()`
* has `result` kind of taint.
*/
TaintKind getTaintOfMethodResult(string name) { none() }
/**
* Gets the taint resulting from the flow step `fromnode` -> `tonode`.
*/
TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { none() }
/**
* Gets the taint resulting from the flow step `fromnode` -> `tonode`, with `edgeLabel`
*/
TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) {
result = this.getTaintForFlowStep(fromnode, tonode) and
edgeLabel = "custom taint flow step for " + this
}
/**
* Holds if this kind of taint "taints" `expr`.
*/
final predicate taints(ControlFlowNode expr) {
exists(TaintedNode n | n.getTaintKind() = this and n.getCfgNode() = expr)
}
/** DEPRECATED -- Use getType() instead */
deprecated ClassObject getClass() { none() }
/**
* Gets the class of this kind of taint.
* For example, if this were a kind of string taint
* the `result` would be `theStrType()`.
*/
ClassValue getType() { none() }
/**
* Gets the boolean values (may be one, neither, or both) that
* may result from the Python expression `bool(this)`
*/
boolean booleanValue() {
/*
* Default to true as the vast majority of taint is strings and
* the empty string is almost always benign.
*/
result = true
}
string repr() { result = this }
/**
* Gets the taint resulting from iterating over this kind of taint.
* For example iterating over a text file produces lines. So iterating
* over a tainted file would result in tainted strings
*/
TaintKind getTaintForIteration() { none() }
predicate flowStep(DataFlow::Node fromnode, DataFlow::Node tonode, string edgeLabel) {
exists(DataFlowExtension::DataFlowVariable v |
v = fromnode.asVariable() and
v.getASuccessorVariable() = tonode.asVariable()
) and
edgeLabel = "custom taint variable step"
}
}
/**
* Alias of `TaintKind`, so the two types can be used interchangeably.
*/
class FlowLabel = TaintKind;
/**
* Taint kinds representing collections of other taint kind.
* We use `{kind}` to represent a mapping of string to `kind` and
* `[kind]` to represent a flat collection of `kind`.
* The use of `{` and `[` is chosen to reflect dict and list literals
* in Python. We choose a single character prefix and suffix for simplicity
* and ease of preventing infinite recursion.
*/
abstract class CollectionKind extends TaintKind {
bindingset[this]
CollectionKind() {
(this.charAt(0) = "[" or this.charAt(0) = "{") and
/* Prevent any collection kinds more than 2 deep */
not this.charAt(2) = "[" and
not this.charAt(2) = "{"
}
abstract TaintKind getMember();
abstract predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode);
abstract predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode);
}
/**
* A taint kind representing a flat collections of kinds.
* Typically a sequence, but can include sets.
*/
class SequenceKind extends CollectionKind {
TaintKind itemKind;
SequenceKind() { this = "[" + itemKind + "]" }
TaintKind getItem() { result = itemKind }
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
exists(BinaryExprNode mod |
mod = tonode and
mod.getOp() instanceof Mod and
mod.getAnOperand() = fromnode and
result = this.getItem() and
result.getType() = ObjectInternal::builtin("str")
)
}
override TaintKind getTaintOfMethodResult(string name) {
name = "pop" and result = this.getItem()
}
override string repr() { result = "sequence of " + itemKind }
override TaintKind getTaintForIteration() { result = itemKind }
override TaintKind getMember() { result = itemKind }
override predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
sequence_construct(fromnode.asCfgNode(), tonode.asCfgNode())
}
override predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
SequenceKind::itemFlowStep(fromnode.asCfgNode(), tonode.asCfgNode())
}
}
module SequenceKind {
predicate flowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) {
tonode.(BinaryExprNode).getAnOperand() = fromnode and edgeLabel = "binary operation"
or
Implementation::copyCall(fromnode, tonode) and
edgeLabel = "dict copy"
or
sequence_call(fromnode, tonode) and edgeLabel = "sequence construction"
or
subscript_slice(fromnode, tonode) and edgeLabel = "slicing"
}
predicate itemFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
subscript_index(fromnode, tonode)
}
}
module DictKind {
predicate flowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) {
Implementation::copyCall(fromnode, tonode) and
edgeLabel = "dict copy"
or
tonode.(CallNode).getFunction().pointsTo(ObjectInternal::builtin("dict")) and
tonode.(CallNode).getArg(0) = fromnode and
edgeLabel = "dict() call"
}
}
/* Helper for sequence flow steps */
pragma[noinline]
private predicate subscript_index(ControlFlowNode obj, SubscriptNode sub) {
sub.isLoad() and
sub.getObject() = obj and
not sub.getNode().getIndex() instanceof Slice
}
pragma[noinline]
private predicate subscript_slice(ControlFlowNode obj, SubscriptNode sub) {
sub.isLoad() and
sub.getObject() = obj and
sub.getNode().getIndex() instanceof Slice
}
/**
* A taint kind representing a mapping of objects to kinds.
* Typically a dict, but can include other mappings.
*/
class DictKind extends CollectionKind {
TaintKind valueKind;
DictKind() { this = "{" + valueKind + "}" }
TaintKind getValue() { result = valueKind }
override TaintKind getTaintOfMethodResult(string name) {
name = "get" and result = valueKind
or
name = "values" and result.(SequenceKind).getItem() = valueKind
or
name = "itervalues" and result.(SequenceKind).getItem() = valueKind
}
override string repr() { result = "dict of " + valueKind }
override TaintKind getMember() { result = valueKind }
override predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
dict_construct(fromnode.asCfgNode(), tonode.asCfgNode())
}
override predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
subscript_index(fromnode.asCfgNode(), tonode.asCfgNode())
}
}
/**
* A type of sanitizer of untrusted data.
* Examples include sanitizers for http responses, for DB access or for shell commands.
* Usually a sanitizer can only sanitize data for one particular use.
* For example, a sanitizer for DB commands would not be safe to use for http responses.
*/
abstract class Sanitizer extends string {
bindingset[this]
Sanitizer() { any() }
/** Holds if `taint` cannot flow through `node`. */
predicate sanitizingNode(TaintKind taint, ControlFlowNode node) { none() }
/** Holds if `call` removes removes the `taint` */
predicate sanitizingCall(TaintKind taint, FunctionObject callee) { none() }
/** Holds if `test` shows value to be untainted with `taint` */
predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) { none() }
/** Holds if `test` shows value to be untainted with `taint` */
predicate sanitizingSingleEdge(TaintKind taint, SingleSuccessorGuard test) { none() }
/** Holds if `def` shows value to be untainted with `taint` */
predicate sanitizingDefinition(TaintKind taint, EssaDefinition def) { none() }
}
/**
* A source of taintedness.
* Users of the taint tracking library should override this
* class to provide their own sources.
*/
abstract class TaintSource extends @py_flow_node {
/** Gets a textual representation of this element. */
string toString() { result = "Taint source" }
/**
* Holds if `this` is a source of taint kind `kind`
*
* This must be overridden by subclasses to specify sources of taint.
*
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
*/
abstract predicate isSourceOf(TaintKind kind);
/**
* Holds if `this` is a source of taint kind `kind` for the given context.
* Generally, this should not need to be overridden; overriding `isSourceOf(kind)` should be sufficient.
*
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
*/
predicate isSourceOf(TaintKind kind, TaintTrackingContext context) {
context.isTop() and this.isSourceOf(kind)
}
Location getLocation() { result = this.(ControlFlowNode).getLocation() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets a TaintedNode for this taint source */
TaintedNode getATaintNode() {
result.getCfgNode() = this and
this.isSourceOf(result.getTaintKind(), result.getContext()) and
result.getPath().noAttribute()
}
/** Holds if taint can flow from this source to sink `sink` */
final predicate flowsToSink(TaintKind srckind, TaintSink sink) {
exists(TaintedNode src, TaintedNode tsink |
src = this.getATaintNode() and
src.getTaintKind() = srckind and
src.flowsTo(tsink) and
this.isSourceOf(srckind, _) and
sink = tsink.getCfgNode() and
sink.sinks(tsink.getTaintKind()) and
tsink.getPath().noAttribute() and
tsink.isSink()
)
}
/** Holds if taint can flow from this source to taint sink `sink` */
final predicate flowsToSink(TaintSink sink) { this.flowsToSink(_, sink) }
}
/**
* Warning: Advanced feature. Users are strongly recommended to use `TaintSource` instead.
* A source of taintedness on the ESSA data-flow graph.
* Users of the taint tracking library can override this
* class to provide their own sources on the ESSA graph.
*/
abstract class TaintedDefinition extends EssaNodeDefinition {
/**
* Holds if `this` is a source of taint kind `kind`
*
* This should be overridden by subclasses to specify sources of taint.
*
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
*/
abstract predicate isSourceOf(TaintKind kind);
/**
* Holds if `this` is a source of taint kind `kind` for the given context.
* Generally, this should not need to be overridden; overriding `isSourceOf(kind)` should be sufficient.
*
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
*/
predicate isSourceOf(TaintKind kind, TaintTrackingContext context) {
context.isTop() and this.isSourceOf(kind)
}
}
private class DictUpdate extends DataFlowExtension::DataFlowNode {
MethodCallsiteRefinement call;
DictUpdate() {
exists(CallNode c | c = call.getCall() |
c.getFunction().(AttrNode).getName() = "update" and
c.getArg(0) = this
)
}
override EssaVariable getASuccessorVariable() { call.getVariable() = result }
}
private class SequenceExtends extends DataFlowExtension::DataFlowNode {
MethodCallsiteRefinement call;
SequenceExtends() {
exists(CallNode c | c = call.getCall() |
c.getFunction().(AttrNode).getName() = "extend" and
c.getArg(0) = this
)
}
override EssaVariable getASuccessorVariable() { call.getVariable() = result }
}
/**
* A node that is vulnerable to one or more types of taint.
* These nodes provide the sinks when computing the taint flow graph.
* An example would be an argument to a write to a http response object,
* such an argument would be vulnerable to unsanitized user-input (XSS).
*
* Users of the taint tracking library should extend this
* class to provide their own sink nodes.
*/
abstract class TaintSink extends @py_flow_node {
/** Gets a textual representation of this element. */
string toString() { result = "Taint sink" }
/**
* Holds if `this` "sinks" taint kind `kind`
* Typically this means that `this` is vulnerable to taint kind `kind`.
*
* This must be overridden by subclasses to specify vulnerabilities or other sinks of taint.
*/
abstract predicate sinks(TaintKind taint);
Location getLocation() { result = this.(ControlFlowNode).getLocation() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
/**
* Extension for data-flow, to help express data-flow paths that are
* library or framework specific and cannot be inferred by the general
* data-flow machinery.
*/
module DataFlowExtension {
/** A control flow node that modifies the basic data-flow. */
abstract class DataFlowNode extends @py_flow_node {
/** Gets a textual representation of this element. */
string toString() { result = "Dataflow extension node" }
/**
* Gets a successor node for data-flow.
* Data (all forms) is assumed to flow from `this` to `result`
*/
ControlFlowNode getASuccessorNode() { none() }
/**
* Gets a successor variable for data-flow.
* Data (all forms) is assumed to flow from `this` to `result`.
* Note: This is an unlikely form of flow. See `DataFlowVariable.getASuccessorVariable()`
*/
EssaVariable getASuccessorVariable() { none() }
/**
* Holds if data cannot flow from `this` to `succ`,
* even though it would normally do so.
*/
predicate prunedSuccessor(ControlFlowNode succ) { none() }
/**
* Gets a successor node, where the successor node will be tainted with `tokind`
* when `this` is tainted with `fromkind`.
* Extensions to `DataFlowNode` should override this to provide additional taint steps.
*/
ControlFlowNode getASuccessorNode(TaintKind fromkind, TaintKind tokind) { none() }
/**
* Gets a successor node for data-flow with a change of context from callee to caller
* (going *up* the call-stack) across call-site `call`.
* Data (all forms) is assumed to flow from `this` to `result`
* Extensions to `DataFlowNode` should override this to provide additional taint steps.
*/
ControlFlowNode getAReturnSuccessorNode(CallNode call) { none() }
/**
* Gets a successor node for data-flow with a change of context from caller to callee
* (going *down* the call-stack) across call-site `call`.
* Data (all forms) is assumed to flow from `this` to `result`
* Extensions to `DataFlowNode` should override this to provide additional taint steps.
*/
ControlFlowNode getACalleeSuccessorNode(CallNode call) { none() }
}
/** Data flow variable that modifies the basic data-flow. */
class DataFlowVariable extends EssaVariable {
/**
* Gets a successor node for data-flow.
* Data (all forms) is assumed to flow from `this` to `result`
* Note: This is an unlikely form of flow. See `DataFlowNode.getASuccessorNode()`
*/
ControlFlowNode getASuccessorNode() { none() }
/**
* Gets a successor variable for data-flow.
* Data (all forms) is assumed to flow from `this` to `result`.
*/
EssaVariable getASuccessorVariable() { none() }
/**
* Holds if data cannot flow from `this` to `succ`,
* even though it would normally do so.
*/
predicate prunedSuccessor(EssaVariable succ) { none() }
}
}
class TaintedPathSource extends TaintTrackingNode {
TaintedPathSource() { this.isSource() }
DataFlow::Node getSource() { result = this.getNode() }
}
class TaintedPathSink extends TaintTrackingNode {
TaintedPathSink() { this.isSink() }
DataFlow::Node getSink() { result = this.getNode() }
}
/* Backwards compatible name */
class TaintedNode = TaintTrackingNode;
/* Helpers for Validating classes */
private import semmle.python.pointsto.PointsTo
/**
* Data flow module providing an interface compatible with
* the other language implementations.
*/
module DataFlow {
/**
* Generic taint kind, source and sink classes for convenience and
* compatibility with other language libraries
*/
class Extension = DataFlowExtension::DataFlowNode;
abstract deprecated class Configuration extends string {
bindingset[this]
Configuration() { this = this }
abstract predicate isSource(ControlFlowNode source);
abstract predicate isSink(ControlFlowNode sink);
private predicate hasFlowPath(TaintedNode source, TaintedNode sink) {
source.getConfiguration() = this and
this.isSource(source.getCfgNode()) and
this.isSink(sink.getCfgNode()) and
source.flowsTo(sink)
}
predicate hasFlow(ControlFlowNode source, ControlFlowNode sink) {
exists(TaintedNode psource, TaintedNode psink |
psource.getCfgNode() = source and
psink.getCfgNode() = sink and
this.isSource(source) and
this.isSink(sink) and
this.hasFlowPath(psource, psink)
)
}
}
deprecated private class ConfigurationAdapter extends TaintTracking::Configuration {
ConfigurationAdapter() { this instanceof Configuration }
override predicate isSource(DataFlow::Node node, TaintKind kind) {
this.(Configuration).isSource(node.asCfgNode()) and
kind instanceof DataFlowType
}
override predicate isSink(DataFlow::Node node, TaintKind kind) {
this.(Configuration).isSink(node.asCfgNode()) and
kind instanceof DataFlowType
}
}
private newtype TDataFlowNode =
TEssaNode(EssaVariable var) or
TCfgNode(ControlFlowNode node)
abstract class Node extends TDataFlowNode {
abstract ControlFlowNode asCfgNode();
abstract EssaVariable asVariable();
/** Gets a textual representation of this element. */
abstract string toString();
abstract Scope getScope();
abstract BasicBlock getBasicBlock();
abstract Location getLocation();
AstNode asAstNode() { result = this.asCfgNode().getNode() }
/** For backwards compatibility -- Use asAstNode() instead */
deprecated AstNode getNode() { result = this.asAstNode() }
}
class CfgNode extends Node, TCfgNode {
override ControlFlowNode asCfgNode() { this = TCfgNode(result) }
override EssaVariable asVariable() { none() }
/** Gets a textual representation of this element. */
override string toString() { result = this.asAstNode().toString() }
override Scope getScope() { result = this.asCfgNode().getScope() }
override BasicBlock getBasicBlock() { result = this.asCfgNode().getBasicBlock() }
override Location getLocation() { result = this.asCfgNode().getLocation() }
}
class EssaNode extends Node, TEssaNode {
override ControlFlowNode asCfgNode() { none() }
override EssaVariable asVariable() { this = TEssaNode(result) }
/** Gets a textual representation of this element. */
override string toString() { result = this.asVariable().toString() }
override Scope getScope() { result = this.asVariable().getScope() }
override BasicBlock getBasicBlock() {
result = this.asVariable().getDefinition().getBasicBlock()
}
override Location getLocation() { result = this.asVariable().getDefinition().getLocation() }
}
}
deprecated private class DataFlowType extends TaintKind {
DataFlowType() {
this = "Data flow" and
exists(DataFlow::Configuration c)
}
}
pragma[noinline]
private predicate dict_construct(ControlFlowNode itemnode, ControlFlowNode dictnode) {
dictnode.(DictNode).getAValue() = itemnode
or
dictnode.(CallNode).getFunction().pointsTo(ObjectInternal::builtin("dict")) and
dictnode.(CallNode).getArgByName(_) = itemnode
}
pragma[noinline]
private predicate sequence_construct(ControlFlowNode itemnode, ControlFlowNode seqnode) {
seqnode.isLoad() and
(
seqnode.(ListNode).getElement(_) = itemnode
or
seqnode.(TupleNode).getElement(_) = itemnode
or
seqnode.(SetNode).getAnElement() = itemnode
)
}
/* A call to construct a sequence from a sequence or iterator*/
pragma[noinline]
private predicate sequence_call(ControlFlowNode fromnode, CallNode tonode) {
tonode.getArg(0) = fromnode and
exists(ControlFlowNode cls | cls = tonode.getFunction() |
cls.pointsTo(ObjectInternal::builtin("list"))
or
cls.pointsTo(ObjectInternal::builtin("tuple"))
or
cls.pointsTo(ObjectInternal::builtin("set"))
)
}
import old.TaintTracking

View File

@@ -22,5 +22,5 @@ private import python
* global (inter-procedural) data flow analyses.
*/
module DataFlow {
import experimental.dataflow.internal.DataFlowImpl
import internal.DataFlowImpl
}

View File

@@ -7,7 +7,7 @@
* the source may reach the sink. We do not track flow across pointer
* dereferences or array indexing. To track these types of flow, where the
* exact value may not be preserved, import
* `experimental.dataflow.TaintTracking`.
* `semmle.python.dataflow.new.TaintTracking`.
*
* To use global (interprocedural) data flow, extend the class
* `DataFlow::Configuration` as documented on that class. To use local
@@ -22,5 +22,5 @@ private import python
* global (inter-procedural) data flow analyses.
*/
module DataFlow2 {
import experimental.dataflow.internal.DataFlowImpl2
import semmle.python.dataflow.new.internal.DataFlowImpl2
}

View File

@@ -7,7 +7,7 @@
* the source may reach the sink. We do not track flow across pointer
* dereferences or array indexing. To track these types of flow, where the
* exact value may not be preserved, import
* `experimental.dataflow.TaintTracking`.
* `semmle.python.dataflow.new.TaintTracking`.
*
* To use global (interprocedural) data flow, extend the class
* `DataFlow::Configuration` as documented on that class. To use local
@@ -22,5 +22,5 @@ private import python
* global (inter-procedural) data flow analyses.
*/
module DataFlow3 {
import experimental.dataflow.internal.DataFlowImpl3
import semmle.python.dataflow.new.internal.DataFlowImpl3
}

View File

@@ -7,7 +7,7 @@
* the source may reach the sink. We do not track flow across pointer
* dereferences or array indexing. To track these types of flow, where the
* exact value may not be preserved, import
* `experimental.dataflow.TaintTracking`.
* `semmle.python.dataflow.new.TaintTracking`.
*
* To use global (interprocedural) data flow, extend the class
* `DataFlow::Configuration` as documented on that class. To use local
@@ -22,5 +22,5 @@ private import python
* global (inter-procedural) data flow analyses.
*/
module DataFlow4 {
import experimental.dataflow.internal.DataFlowImpl4
import semmle.python.dataflow.new.internal.DataFlowImpl4
}

View File

@@ -1,8 +1,8 @@
private import python
private import experimental.dataflow.DataFlow
private import semmle.python.dataflow.new.DataFlow
// Need to import since frameworks can extend `RemoteFlowSource::Range`
private import experimental.semmle.python.Frameworks
private import experimental.semmle.python.Concepts
private import semmle.python.Frameworks
private import semmle.python.Concepts
/**
* A data flow source of remote user input.

View File

@@ -15,5 +15,5 @@ private import python
* global (inter-procedural) taint-tracking analyses.
*/
module TaintTracking {
import experimental.dataflow.internal.tainttracking1.TaintTrackingImpl
import internal.tainttracking1.TaintTrackingImpl
}

View File

@@ -15,5 +15,5 @@ private import python
* global (inter-procedural) taint-tracking analyses.
*/
module TaintTracking2 {
import experimental.dataflow.internal.tainttracking2.TaintTrackingImpl
import semmle.python.dataflow.new.internal.tainttracking2.TaintTrackingImpl
}

View File

@@ -15,5 +15,5 @@ private import python
* global (inter-procedural) taint-tracking analyses.
*/
module TaintTracking3 {
import experimental.dataflow.internal.tainttracking3.TaintTrackingImpl
import semmle.python.dataflow.new.internal.tainttracking3.TaintTrackingImpl
}

View File

@@ -15,5 +15,5 @@ private import python
* global (inter-procedural) taint-tracking analyses.
*/
module TaintTracking4 {
import experimental.dataflow.internal.tainttracking4.TaintTrackingImpl
import semmle.python.dataflow.new.internal.tainttracking4.TaintTrackingImpl
}

View File

@@ -4,7 +4,7 @@
private import python
private import DataFlowPrivate
import experimental.dataflow.TypeTracker
import semmle.python.dataflow.new.TypeTracker
import Attributes
private import semmle.python.essa.SsaCompute

View File

@@ -1,7 +1,7 @@
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.internal.DataFlowPrivate
private import experimental.dataflow.internal.TaintTrackingPublic
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate
private import semmle.python.dataflow.new.internal.TaintTrackingPublic
/**
* Holds if `node` should be a sanitizer in all global taint flow configurations

View File

@@ -5,9 +5,9 @@
private import python
private import TaintTrackingPrivate
private import experimental.dataflow.DataFlow
private import semmle.python.dataflow.new.DataFlow
// Need to import since frameworks can extend `AdditionalTaintStep`
private import experimental.semmle.python.Frameworks
private import semmle.python.Frameworks
// Local taint flow and helpers
/**

View File

@@ -0,0 +1,6 @@
import semmle.python.dataflow.new.internal.TaintTrackingPublic as Public
module Private {
import semmle.python.dataflow.new.DataFlow::DataFlow as DataFlow
import semmle.python.dataflow.new.internal.TaintTrackingPrivate
}

View File

@@ -0,0 +1,6 @@
import semmle.python.dataflow.new.internal.TaintTrackingPublic as Public
module Private {
import semmle.python.dataflow.new.DataFlow2::DataFlow2 as DataFlow
import semmle.python.dataflow.new.internal.TaintTrackingPrivate
}

View File

@@ -0,0 +1,6 @@
import semmle.python.dataflow.new.internal.TaintTrackingPublic as Public
module Private {
import semmle.python.dataflow.new.DataFlow3::DataFlow3 as DataFlow
import semmle.python.dataflow.new.internal.TaintTrackingPrivate
}

View File

@@ -0,0 +1,6 @@
import semmle.python.dataflow.new.internal.TaintTrackingPublic as Public
module Private {
import semmle.python.dataflow.new.DataFlow4::DataFlow4 as DataFlow
import semmle.python.dataflow.new.internal.TaintTrackingPrivate
}

View File

@@ -0,0 +1,141 @@
import python
import semmle.python.dataflow.TaintTracking
private import semmle.python.objects.ObjectInternal
private import semmle.python.dataflow.Implementation
module TaintTracking {
class Source = TaintSource;
class Sink = TaintSink;
class Extension = DataFlowExtension::DataFlowNode;
class PathSource = TaintTrackingNode;
class PathSink = TaintTrackingNode;
abstract class Configuration extends string {
/* Required to prevent compiler warning */
bindingset[this]
Configuration() { this = this }
/* Old implementation API */
predicate isSource(Source src) { none() }
predicate isSink(Sink sink) { none() }
predicate isSanitizer(Sanitizer sanitizer) { none() }
predicate isExtension(Extension extension) { none() }
/* New implementation API */
/**
* Holds if `src` is a source of taint of `kind` that is relevant
* for this configuration.
*/
predicate isSource(DataFlow::Node src, TaintKind kind) {
exists(TaintSource taintSrc |
this.isSource(taintSrc) and
src.asCfgNode() = taintSrc and
taintSrc.isSourceOf(kind)
)
}
/**
* Holds if `sink` is a sink of taint of `kind` that is relevant
* for this configuration.
*/
predicate isSink(DataFlow::Node sink, TaintKind kind) {
exists(TaintSink taintSink |
this.isSink(taintSink) and
sink.asCfgNode() = taintSink and
taintSink.sinks(kind)
)
}
/**
* Holds if `src -> dest` should be considered as a flow edge
* in addition to standard data flow edges.
*/
predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node dest) { none() }
/**
* Holds if `src -> dest` is a flow edge converting taint from `srckind` to `destkind`.
*/
predicate isAdditionalFlowStep(
DataFlow::Node src, DataFlow::Node dest, TaintKind srckind, TaintKind destkind
) {
none()
}
/**
* Holds if `node` should be considered as a barrier to flow of any kind.
*/
predicate isBarrier(DataFlow::Node node) { none() }
/**
* Holds if `node` should be considered as a barrier to flow of `kind`.
*/
predicate isBarrier(DataFlow::Node node, TaintKind kind) {
exists(Sanitizer sanitizer | this.isSanitizer(sanitizer) |
sanitizer.sanitizingNode(kind, node.asCfgNode())
or
sanitizer.sanitizingEdge(kind, node.asVariable())
or
sanitizer.sanitizingSingleEdge(kind, node.asVariable())
or
sanitizer.sanitizingDefinition(kind, node.asVariable())
or
exists(MethodCallsiteRefinement call, FunctionObject callee |
call = node.asVariable().getDefinition() and
callee.getACall() = call.getCall() and
sanitizer.sanitizingCall(kind, callee)
)
)
}
/**
* Holds if flow from `src` to `dest` is prohibited.
*/
predicate isBarrierEdge(DataFlow::Node src, DataFlow::Node dest) { none() }
/**
* Holds if control flow from `test` along the `isTrue` edge is prohibited.
*/
predicate isBarrierTest(ControlFlowNode test, boolean isTrue) { none() }
/**
* Holds if flow from `src` to `dest` is prohibited when the incoming taint is `srckind` and the outgoing taint is `destkind`.
* Note that `srckind` and `destkind` can be the same.
*/
predicate isBarrierEdge(
DataFlow::Node src, DataFlow::Node dest, TaintKind srckind, TaintKind destkind
) {
none()
}
/* Common query API */
predicate hasFlowPath(PathSource src, PathSink sink) {
this.(TaintTrackingImplementation).hasFlowPath(src, sink)
}
/* Old query API */
/* deprecated */
deprecated predicate hasFlow(Source src, Sink sink) {
exists(PathSource psrc, PathSink psink |
this.hasFlowPath(psrc, psink) and
src = psrc.getNode().asCfgNode() and
sink = psink.getNode().asCfgNode()
)
}
/* New query API */
predicate hasSimpleFlow(DataFlow::Node src, DataFlow::Node sink) {
exists(PathSource psrc, PathSink psink |
this.hasFlowPath(psrc, psink) and
src = psrc.getNode() and
sink = psink.getNode()
)
}
}
}

View File

@@ -0,0 +1 @@
import semmle.python.dataflow.TaintTracking

View File

@@ -0,0 +1,19 @@
import python
import semmle.python.dataflow.TaintTracking
class OpenFile extends TaintKind {
OpenFile() { this = "file.open" }
override string repr() { result = "an open file" }
}
class OpenFileConfiguration extends TaintTracking::Configuration {
OpenFileConfiguration() { this = "Open file configuration" }
override predicate isSource(DataFlow::Node src, TaintKind kind) {
src.asCfgNode() = Value::named("open").getACall() and
kind instanceof OpenFile
}
override predicate isSink(DataFlow::Node sink, TaintKind kind) { none() }
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,68 @@
import semmle.python.dataflow.TaintTracking
private import semmle.python.objects.ObjectInternal
import semmle.python.dataflow.Implementation
/* For backwards compatibility -- Use `TaintTrackingContext` instead. */
deprecated class CallContext extends TaintTrackingContext {
TaintTrackingContext getCallee(CallNode call) { result.getCaller(call) = this }
predicate appliesToScope(Scope s) {
exists(PythonFunctionObjectInternal func, TaintKind param, AttributePath path, int n |
this = TParamContext(param, path, n) and
exists(TaintTrackingImplementation impl |
impl.callWithTaintedArgument(_, _, _, func, n, path, param) and
s = func.getScope()
)
)
or
this.isTop()
}
}
/* Backwards compatibility with config-less taint-tracking */
private class LegacyConfiguration extends TaintTracking::Configuration {
LegacyConfiguration() {
/* A name that won't be accidentally chosen by users */
this = "Semmle: Internal legacy configuration"
}
override predicate isSource(TaintSource src) { src = src }
override predicate isSink(TaintSink sink) { sink = sink }
override predicate isSanitizer(Sanitizer sanitizer) { sanitizer = sanitizer }
override predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node dest) {
exists(DataFlowExtension::DataFlowNode legacyExtension | src.asCfgNode() = legacyExtension |
dest.asCfgNode() = legacyExtension.getASuccessorNode()
or
dest.asVariable() = legacyExtension.getASuccessorVariable()
or
dest.asCfgNode() = legacyExtension.getAReturnSuccessorNode(_)
or
dest.asCfgNode() = legacyExtension.getACalleeSuccessorNode(_)
)
}
override predicate isAdditionalFlowStep(
DataFlow::Node src, DataFlow::Node dest, TaintKind srckind, TaintKind destkind
) {
exists(DataFlowExtension::DataFlowNode legacyExtension | src.asCfgNode() = legacyExtension |
dest.asCfgNode() = legacyExtension.getASuccessorNode(srckind, destkind)
)
}
override predicate isBarrierEdge(DataFlow::Node src, DataFlow::Node dest) {
(
exists(DataFlowExtension::DataFlowVariable legacyExtension |
src.asVariable() = legacyExtension and
legacyExtension.prunedSuccessor(dest.asVariable())
)
or
exists(DataFlowExtension::DataFlowNode legacyExtension |
src.asCfgNode() = legacyExtension and
legacyExtension.prunedSuccessor(dest.asCfgNode())
)
)
}
}

View File

@@ -0,0 +1,174 @@
/**
* Provides classes and predicates for tracking global state across the control flow and call graphs.
*
* NOTE: State tracking tracks both whether a state may apply to a given node in a given context *and*
* whether it may not apply.
* That `state.appliesTo(f, ctx)` holds implies nothing about whether `state.mayNotApplyTo(f, ctx)` holds.
* Neither may hold which merely means that `f` with context `ctx` is not reached during the analysis.
* Conversely, both may hold, which means that `state` may or may not apply depending on how `f` was reached.
*/
import python
private import semmle.python.pointsto.Base
private import semmle.python.pointsto.PointsTo
private import semmle.python.pointsto.PointsToContext
private import semmle.python.objects.ObjectInternal
/** A state that should be tracked. */
abstract class TrackableState extends string {
bindingset[this]
TrackableState() { this = this }
/** Holds if this state may apply to the control flow node `f`, regardless of the context. */
final predicate appliesTo(ControlFlowNode f) { this.appliesTo(f, _) }
/** Holds if this state may not apply to the control flow node `f`, given the context `ctx`. */
final predicate appliesTo(ControlFlowNode f, Context ctx) {
StateTracking::appliesToNode(this, f, ctx, true)
}
/** Holds if this state may apply to the control flow node `f`, given the context `ctx`. */
final predicate mayNotApplyTo(ControlFlowNode f, Context ctx) {
StateTracking::appliesToNode(this, f, ctx, false)
}
/** Holds if this state may apply to the control flow node `f`, regardless of the context. */
final predicate mayNotApplyTo(ControlFlowNode f) { this.mayNotApplyTo(f, _) }
/** Holds if `test` shows value to be untainted with `taint`, given the context `ctx`. */
predicate testsFor(PyEdgeRefinement test, Context ctx, boolean sense) {
ctx.appliesToScope(test.getScope()) and this.testsFor(test, sense)
}
/** Holds if `test` shows value to be untainted with `taint` */
predicate testsFor(PyEdgeRefinement test, boolean sense) { none() }
/**
* Holds if state starts at `f`.
* Either this predicate or `startsAt(ControlFlowNode f, Context ctx)`
* should be overriden by sub-classes.
*/
predicate startsAt(ControlFlowNode f) { none() }
/**
* Holds if state starts at `f` given context `ctx`.
* Either this predicate or `startsAt(ControlFlowNode f)`
* should be overriden by sub-classes.
*/
pragma[noinline]
predicate startsAt(ControlFlowNode f, Context ctx) { ctx.appliesTo(f) and this.startsAt(f) }
/**
* Holds if state ends at `f`.
* Either this predicate or `endsAt(ControlFlowNode f, Context ctx)`
* may be overriden by sub-classes.
*/
predicate endsAt(ControlFlowNode f) { none() }
/**
* Holds if state ends at `f` given context `ctx`.
* Either this predicate or `endsAt(ControlFlowNode f)`
* may be overriden by sub-classes.
*/
pragma[noinline]
predicate endsAt(ControlFlowNode f, Context ctx) { ctx.appliesTo(f) and this.endsAt(f) }
}
module StateTracking {
private predicate not_allowed(TrackableState state, ControlFlowNode f, Context ctx, boolean sense) {
state.endsAt(f, ctx) and sense = true
or
state.startsAt(f, ctx) and sense = false
}
/**
* Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) to
* control flow node `f` given the context `ctx`.
*/
predicate appliesToNode(TrackableState state, ControlFlowNode f, Context ctx, boolean sense) {
state.endsAt(f, ctx) and sense = false
or
state.startsAt(f, ctx) and sense = true
or
not not_allowed(state, f, ctx, sense) and
(
exists(BasicBlock b |
/* First node in a block */
f = b.getNode(0) and appliesAtBlockStart(state, b, ctx, sense)
or
/* Other nodes in block, except trackable calls */
exists(int n |
f = b.getNode(n) and
appliesToNode(state, b.getNode(n - 1), ctx, sense) and
not exists(PythonFunctionObjectInternal func, Context callee |
callee.fromCall(f, func, ctx)
)
)
)
or
/* Function entry via call */
exists(PythonFunctionObjectInternal func, CallNode call, Context caller |
ctx.fromCall(call, func, caller) and
func.getScope().getEntryNode() = f and
appliesToNode(state, call.getAPredecessor(), caller, sense)
)
or
/* Function return */
exists(PythonFunctionObjectInternal func, Context callee |
callee.fromCall(f, func, ctx) and
appliesToNode(state, func.getScope().getANormalExit(), callee, sense)
)
or
/* Other scope entries */
exists(Scope s |
s.getEntryNode() = f and
ctx.appliesToScope(s)
|
not exists(Scope pred | pred.precedes(s)) and
(ctx.isImport() or ctx.isRuntime()) and
sense = false
or
exists(Scope pred, Context pred_ctx |
appliesToNode(state, pred.getANormalExit(), pred_ctx, sense) and
pred.precedes(s) and
ctx.isRuntime()
|
pred_ctx.isRuntime() or pred_ctx.isImport()
)
)
)
}
/**
* Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) at the
* start of basic block `block` given the context `ctx`.
*/
private predicate appliesAtBlockStart(
TrackableState state, BasicBlock block, Context ctx, boolean sense
) {
exists(PyEdgeRefinement test |
test.getSuccessor() = block and
state.testsFor(test, ctx, sense)
)
or
exists(BasicBlock pred |
pred.getASuccessor() = block and
appliesAtBlockEnd(state, pred, ctx, sense) and
not exists(PyEdgeRefinement test |
test.getPredecessor() = pred and
test.getSuccessor() = block and
state.testsFor(test, sense.booleanNot())
)
)
}
/**
* Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) at the
* end of basic block `block` given the context `ctx`.
*/
private predicate appliesAtBlockEnd(
TrackableState state, BasicBlock block, Context ctx, boolean sense
) {
appliesToNode(state, block.getLastNode(), ctx, sense)
}
}

View File

@@ -0,0 +1,751 @@
/**
* # Python Taint Tracking Library
*
* The taint tracking library is described in three parts.
*
* 1. Specification of kinds, sources, sinks and flows.
* 2. The high level query API
* 3. The implementation.
*
*
* ## Specification
*
* There are four parts to the specification of a taint tracking query.
* These are:
*
* 1. Kinds
*
* The Python taint tracking library supports arbitrary kinds of taint.
* This is useful where you want to track something related to "taint", but that is in itself not dangerous.
* For example, we might want to track the flow of request objects.
* Request objects are not in themselves tainted, but they do contain tainted data.
* For example, the length or timestamp of a request may not pose a risk, but the GET or POST string probably do.
* So, we would want to track request objects distinctly from the request data in the GET or POST field.
*
* Kinds can also specify additional flow steps, but we recommend using the `DataFlowExtension` module,
* which is less likely to cause issues with unwanted recursion.
*
* 2. Sources
*
* Sources of taint can be added by importing a predefined sub-type of `TaintSource`, or by defining new ones.
*
* 3. Sinks (or vulnerabilities)
*
* Sinks can be added by importing a predefined sub-type of `TaintSink`, or by defining new ones.
*
* 4. Flow extensions
*
* Additional flow can be added by importing predefined sub-types of `DataFlowExtension::DataFlowNode`
* or `DataFlowExtension::DataFlowVariable` or by defining new ones.
*
*
* ## The high-level query API
*
* The `TaintedNode` fully describes the taint flow graph.
* The full graph can be expressed as:
*
* ```ql
* from TaintedNode n, TaintedNode s
* where s = n.getASuccessor()
* select n, s
* ```
*
* The source -> sink relation can be expressed either using `TaintedNode`:
* ```ql
* from TaintedNode src, TaintedNode sink
* where src.isSource() and sink.isSink() and src.getASuccessor*() = sink
* select src, sink
* ```
* or, using the specification API:
* ```ql
* from TaintSource src, TaintSink sink
* where src.flowsToSink(sink)
* select src, sink
* ```
*
* ## The implementation
*
* The data-flow graph used by the taint-tracking library is the one created by the points-to analysis,
* and consists of the base data-flow graph defined in `semmle/python/essa/Essa.qll`
* enhanced with precise variable flows, call graph and type information.
* This graph is then enhanced with additional flows as specified above.
* Since the call graph and points-to information is context sensitive, the taint graph must also be context sensitive.
*
* The taint graph is a directed graph where each node consists of a
* `(CFG node, context, taint)` triple although it could be thought of more naturally
* as a number of distinct graphs, one for each input taint-kind consisting of data flow nodes,
* `(CFG node, context)` pairs, labelled with their `taint`.
*
* The `TrackedValue` used in the implementation is not the taint kind specified by the user,
* but describes both the kind of taint and how that taint relates to any object referred to by a data-flow graph node or edge.
* Currently, only two types of `taint` are supported: simple taint, where the object is actually tainted;
* and attribute taint where a named attribute of the referred object is tainted.
*
* Support for tainted members (both specific members of tuples and the like,
* and generic members for mutable collections) are likely to be added in the near future and other forms are possible.
* The types of taints are hard-wired with no user-visible extension method at the moment.
*/
import python
private import semmle.python.pointsto.Filters as Filters
private import semmle.python.objects.ObjectInternal
private import semmle.python.dataflow.Implementation
import semmle.python.dataflow.Configuration
/**
* A 'kind' of taint. This may be almost anything,
* but it is typically something like a "user-defined string".
* Examples include, data from a http request object,
* data from an SMS or other mobile data source,
* or, for a super secure system, environment variables or
* the local file system.
*/
abstract class TaintKind extends string {
bindingset[this]
TaintKind() { any() }
/**
* Gets the kind of taint that the named attribute will have if an object is tainted with this taint.
* In other words, if `x` has this kind of taint then it implies that `x.name`
* has `result` kind of taint.
*/
TaintKind getTaintOfAttribute(string name) { none() }
/**
* Gets the kind of taint results from calling the named method if an object is tainted with this taint.
* In other words, if `x` has this kind of taint then it implies that `x.name()`
* has `result` kind of taint.
*/
TaintKind getTaintOfMethodResult(string name) { none() }
/**
* Gets the taint resulting from the flow step `fromnode` -> `tonode`.
*/
TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { none() }
/**
* Gets the taint resulting from the flow step `fromnode` -> `tonode`, with `edgeLabel`
*/
TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) {
result = this.getTaintForFlowStep(fromnode, tonode) and
edgeLabel = "custom taint flow step for " + this
}
/**
* Holds if this kind of taint "taints" `expr`.
*/
final predicate taints(ControlFlowNode expr) {
exists(TaintedNode n | n.getTaintKind() = this and n.getCfgNode() = expr)
}
/** DEPRECATED -- Use getType() instead */
deprecated ClassObject getClass() { none() }
/**
* Gets the class of this kind of taint.
* For example, if this were a kind of string taint
* the `result` would be `theStrType()`.
*/
ClassValue getType() { none() }
/**
* Gets the boolean values (may be one, neither, or both) that
* may result from the Python expression `bool(this)`
*/
boolean booleanValue() {
/*
* Default to true as the vast majority of taint is strings and
* the empty string is almost always benign.
*/
result = true
}
string repr() { result = this }
/**
* Gets the taint resulting from iterating over this kind of taint.
* For example iterating over a text file produces lines. So iterating
* over a tainted file would result in tainted strings
*/
TaintKind getTaintForIteration() { none() }
predicate flowStep(DataFlow::Node fromnode, DataFlow::Node tonode, string edgeLabel) {
exists(DataFlowExtension::DataFlowVariable v |
v = fromnode.asVariable() and
v.getASuccessorVariable() = tonode.asVariable()
) and
edgeLabel = "custom taint variable step"
}
}
/**
* Alias of `TaintKind`, so the two types can be used interchangeably.
*/
class FlowLabel = TaintKind;
/**
* Taint kinds representing collections of other taint kind.
* We use `{kind}` to represent a mapping of string to `kind` and
* `[kind]` to represent a flat collection of `kind`.
* The use of `{` and `[` is chosen to reflect dict and list literals
* in Python. We choose a single character prefix and suffix for simplicity
* and ease of preventing infinite recursion.
*/
abstract class CollectionKind extends TaintKind {
bindingset[this]
CollectionKind() {
(this.charAt(0) = "[" or this.charAt(0) = "{") and
/* Prevent any collection kinds more than 2 deep */
not this.charAt(2) = "[" and
not this.charAt(2) = "{"
}
abstract TaintKind getMember();
abstract predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode);
abstract predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode);
}
/**
* A taint kind representing a flat collections of kinds.
* Typically a sequence, but can include sets.
*/
class SequenceKind extends CollectionKind {
TaintKind itemKind;
SequenceKind() { this = "[" + itemKind + "]" }
TaintKind getItem() { result = itemKind }
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
exists(BinaryExprNode mod |
mod = tonode and
mod.getOp() instanceof Mod and
mod.getAnOperand() = fromnode and
result = this.getItem() and
result.getType() = ObjectInternal::builtin("str")
)
}
override TaintKind getTaintOfMethodResult(string name) {
name = "pop" and result = this.getItem()
}
override string repr() { result = "sequence of " + itemKind }
override TaintKind getTaintForIteration() { result = itemKind }
override TaintKind getMember() { result = itemKind }
override predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
sequence_construct(fromnode.asCfgNode(), tonode.asCfgNode())
}
override predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
SequenceKind::itemFlowStep(fromnode.asCfgNode(), tonode.asCfgNode())
}
}
module SequenceKind {
predicate flowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) {
tonode.(BinaryExprNode).getAnOperand() = fromnode and edgeLabel = "binary operation"
or
Implementation::copyCall(fromnode, tonode) and
edgeLabel = "dict copy"
or
sequence_call(fromnode, tonode) and edgeLabel = "sequence construction"
or
subscript_slice(fromnode, tonode) and edgeLabel = "slicing"
}
predicate itemFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
subscript_index(fromnode, tonode)
}
}
module DictKind {
predicate flowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) {
Implementation::copyCall(fromnode, tonode) and
edgeLabel = "dict copy"
or
tonode.(CallNode).getFunction().pointsTo(ObjectInternal::builtin("dict")) and
tonode.(CallNode).getArg(0) = fromnode and
edgeLabel = "dict() call"
}
}
/* Helper for sequence flow steps */
pragma[noinline]
private predicate subscript_index(ControlFlowNode obj, SubscriptNode sub) {
sub.isLoad() and
sub.getObject() = obj and
not sub.getNode().getIndex() instanceof Slice
}
pragma[noinline]
private predicate subscript_slice(ControlFlowNode obj, SubscriptNode sub) {
sub.isLoad() and
sub.getObject() = obj and
sub.getNode().getIndex() instanceof Slice
}
/**
* A taint kind representing a mapping of objects to kinds.
* Typically a dict, but can include other mappings.
*/
class DictKind extends CollectionKind {
TaintKind valueKind;
DictKind() { this = "{" + valueKind + "}" }
TaintKind getValue() { result = valueKind }
override TaintKind getTaintOfMethodResult(string name) {
name = "get" and result = valueKind
or
name = "values" and result.(SequenceKind).getItem() = valueKind
or
name = "itervalues" and result.(SequenceKind).getItem() = valueKind
}
override string repr() { result = "dict of " + valueKind }
override TaintKind getMember() { result = valueKind }
override predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
dict_construct(fromnode.asCfgNode(), tonode.asCfgNode())
}
override predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode) {
subscript_index(fromnode.asCfgNode(), tonode.asCfgNode())
}
}
/**
* A type of sanitizer of untrusted data.
* Examples include sanitizers for http responses, for DB access or for shell commands.
* Usually a sanitizer can only sanitize data for one particular use.
* For example, a sanitizer for DB commands would not be safe to use for http responses.
*/
abstract class Sanitizer extends string {
bindingset[this]
Sanitizer() { any() }
/** Holds if `taint` cannot flow through `node`. */
predicate sanitizingNode(TaintKind taint, ControlFlowNode node) { none() }
/** Holds if `call` removes removes the `taint` */
predicate sanitizingCall(TaintKind taint, FunctionObject callee) { none() }
/** Holds if `test` shows value to be untainted with `taint` */
predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) { none() }
/** Holds if `test` shows value to be untainted with `taint` */
predicate sanitizingSingleEdge(TaintKind taint, SingleSuccessorGuard test) { none() }
/** Holds if `def` shows value to be untainted with `taint` */
predicate sanitizingDefinition(TaintKind taint, EssaDefinition def) { none() }
}
/**
* A source of taintedness.
* Users of the taint tracking library should override this
* class to provide their own sources.
*/
abstract class TaintSource extends @py_flow_node {
/** Gets a textual representation of this element. */
string toString() { result = "Taint source" }
/**
* Holds if `this` is a source of taint kind `kind`
*
* This must be overridden by subclasses to specify sources of taint.
*
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
*/
abstract predicate isSourceOf(TaintKind kind);
/**
* Holds if `this` is a source of taint kind `kind` for the given context.
* Generally, this should not need to be overridden; overriding `isSourceOf(kind)` should be sufficient.
*
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
*/
predicate isSourceOf(TaintKind kind, TaintTrackingContext context) {
context.isTop() and this.isSourceOf(kind)
}
Location getLocation() { result = this.(ControlFlowNode).getLocation() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets a TaintedNode for this taint source */
TaintedNode getATaintNode() {
result.getCfgNode() = this and
this.isSourceOf(result.getTaintKind(), result.getContext()) and
result.getPath().noAttribute()
}
/** Holds if taint can flow from this source to sink `sink` */
final predicate flowsToSink(TaintKind srckind, TaintSink sink) {
exists(TaintedNode src, TaintedNode tsink |
src = this.getATaintNode() and
src.getTaintKind() = srckind and
src.flowsTo(tsink) and
this.isSourceOf(srckind, _) and
sink = tsink.getCfgNode() and
sink.sinks(tsink.getTaintKind()) and
tsink.getPath().noAttribute() and
tsink.isSink()
)
}
/** Holds if taint can flow from this source to taint sink `sink` */
final predicate flowsToSink(TaintSink sink) { this.flowsToSink(_, sink) }
}
/**
* Warning: Advanced feature. Users are strongly recommended to use `TaintSource` instead.
* A source of taintedness on the ESSA data-flow graph.
* Users of the taint tracking library can override this
* class to provide their own sources on the ESSA graph.
*/
abstract class TaintedDefinition extends EssaNodeDefinition {
/**
* Holds if `this` is a source of taint kind `kind`
*
* This should be overridden by subclasses to specify sources of taint.
*
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
*/
abstract predicate isSourceOf(TaintKind kind);
/**
* Holds if `this` is a source of taint kind `kind` for the given context.
* Generally, this should not need to be overridden; overriding `isSourceOf(kind)` should be sufficient.
*
* The smaller this predicate is, the faster `Taint.flowsTo()` will converge.
*/
predicate isSourceOf(TaintKind kind, TaintTrackingContext context) {
context.isTop() and this.isSourceOf(kind)
}
}
private class DictUpdate extends DataFlowExtension::DataFlowNode {
MethodCallsiteRefinement call;
DictUpdate() {
exists(CallNode c | c = call.getCall() |
c.getFunction().(AttrNode).getName() = "update" and
c.getArg(0) = this
)
}
override EssaVariable getASuccessorVariable() { call.getVariable() = result }
}
private class SequenceExtends extends DataFlowExtension::DataFlowNode {
MethodCallsiteRefinement call;
SequenceExtends() {
exists(CallNode c | c = call.getCall() |
c.getFunction().(AttrNode).getName() = "extend" and
c.getArg(0) = this
)
}
override EssaVariable getASuccessorVariable() { call.getVariable() = result }
}
/**
* A node that is vulnerable to one or more types of taint.
* These nodes provide the sinks when computing the taint flow graph.
* An example would be an argument to a write to a http response object,
* such an argument would be vulnerable to unsanitized user-input (XSS).
*
* Users of the taint tracking library should extend this
* class to provide their own sink nodes.
*/
abstract class TaintSink extends @py_flow_node {
/** Gets a textual representation of this element. */
string toString() { result = "Taint sink" }
/**
* Holds if `this` "sinks" taint kind `kind`
* Typically this means that `this` is vulnerable to taint kind `kind`.
*
* This must be overridden by subclasses to specify vulnerabilities or other sinks of taint.
*/
abstract predicate sinks(TaintKind taint);
Location getLocation() { result = this.(ControlFlowNode).getLocation() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
/**
* Extension for data-flow, to help express data-flow paths that are
* library or framework specific and cannot be inferred by the general
* data-flow machinery.
*/
module DataFlowExtension {
/** A control flow node that modifies the basic data-flow. */
abstract class DataFlowNode extends @py_flow_node {
/** Gets a textual representation of this element. */
string toString() { result = "Dataflow extension node" }
/**
* Gets a successor node for data-flow.
* Data (all forms) is assumed to flow from `this` to `result`
*/
ControlFlowNode getASuccessorNode() { none() }
/**
* Gets a successor variable for data-flow.
* Data (all forms) is assumed to flow from `this` to `result`.
* Note: This is an unlikely form of flow. See `DataFlowVariable.getASuccessorVariable()`
*/
EssaVariable getASuccessorVariable() { none() }
/**
* Holds if data cannot flow from `this` to `succ`,
* even though it would normally do so.
*/
predicate prunedSuccessor(ControlFlowNode succ) { none() }
/**
* Gets a successor node, where the successor node will be tainted with `tokind`
* when `this` is tainted with `fromkind`.
* Extensions to `DataFlowNode` should override this to provide additional taint steps.
*/
ControlFlowNode getASuccessorNode(TaintKind fromkind, TaintKind tokind) { none() }
/**
* Gets a successor node for data-flow with a change of context from callee to caller
* (going *up* the call-stack) across call-site `call`.
* Data (all forms) is assumed to flow from `this` to `result`
* Extensions to `DataFlowNode` should override this to provide additional taint steps.
*/
ControlFlowNode getAReturnSuccessorNode(CallNode call) { none() }
/**
* Gets a successor node for data-flow with a change of context from caller to callee
* (going *down* the call-stack) across call-site `call`.
* Data (all forms) is assumed to flow from `this` to `result`
* Extensions to `DataFlowNode` should override this to provide additional taint steps.
*/
ControlFlowNode getACalleeSuccessorNode(CallNode call) { none() }
}
/** Data flow variable that modifies the basic data-flow. */
class DataFlowVariable extends EssaVariable {
/**
* Gets a successor node for data-flow.
* Data (all forms) is assumed to flow from `this` to `result`
* Note: This is an unlikely form of flow. See `DataFlowNode.getASuccessorNode()`
*/
ControlFlowNode getASuccessorNode() { none() }
/**
* Gets a successor variable for data-flow.
* Data (all forms) is assumed to flow from `this` to `result`.
*/
EssaVariable getASuccessorVariable() { none() }
/**
* Holds if data cannot flow from `this` to `succ`,
* even though it would normally do so.
*/
predicate prunedSuccessor(EssaVariable succ) { none() }
}
}
class TaintedPathSource extends TaintTrackingNode {
TaintedPathSource() { this.isSource() }
DataFlow::Node getSource() { result = this.getNode() }
}
class TaintedPathSink extends TaintTrackingNode {
TaintedPathSink() { this.isSink() }
DataFlow::Node getSink() { result = this.getNode() }
}
/* Backwards compatible name */
class TaintedNode = TaintTrackingNode;
/* Helpers for Validating classes */
private import semmle.python.pointsto.PointsTo
/**
* Data flow module providing an interface compatible with
* the other language implementations.
*/
module DataFlow {
/**
* Generic taint kind, source and sink classes for convenience and
* compatibility with other language libraries
*/
class Extension = DataFlowExtension::DataFlowNode;
abstract deprecated class Configuration extends string {
bindingset[this]
Configuration() { this = this }
abstract predicate isSource(ControlFlowNode source);
abstract predicate isSink(ControlFlowNode sink);
private predicate hasFlowPath(TaintedNode source, TaintedNode sink) {
source.getConfiguration() = this and
this.isSource(source.getCfgNode()) and
this.isSink(sink.getCfgNode()) and
source.flowsTo(sink)
}
predicate hasFlow(ControlFlowNode source, ControlFlowNode sink) {
exists(TaintedNode psource, TaintedNode psink |
psource.getCfgNode() = source and
psink.getCfgNode() = sink and
this.isSource(source) and
this.isSink(sink) and
this.hasFlowPath(psource, psink)
)
}
}
deprecated private class ConfigurationAdapter extends TaintTracking::Configuration {
ConfigurationAdapter() { this instanceof Configuration }
override predicate isSource(DataFlow::Node node, TaintKind kind) {
this.(Configuration).isSource(node.asCfgNode()) and
kind instanceof DataFlowType
}
override predicate isSink(DataFlow::Node node, TaintKind kind) {
this.(Configuration).isSink(node.asCfgNode()) and
kind instanceof DataFlowType
}
}
private newtype TDataFlowNode =
TEssaNode(EssaVariable var) or
TCfgNode(ControlFlowNode node)
abstract class Node extends TDataFlowNode {
abstract ControlFlowNode asCfgNode();
abstract EssaVariable asVariable();
/** Gets a textual representation of this element. */
abstract string toString();
abstract Scope getScope();
abstract BasicBlock getBasicBlock();
abstract Location getLocation();
AstNode asAstNode() { result = this.asCfgNode().getNode() }
/** For backwards compatibility -- Use asAstNode() instead */
deprecated AstNode getNode() { result = this.asAstNode() }
}
class CfgNode extends Node, TCfgNode {
override ControlFlowNode asCfgNode() { this = TCfgNode(result) }
override EssaVariable asVariable() { none() }
/** Gets a textual representation of this element. */
override string toString() { result = this.asAstNode().toString() }
override Scope getScope() { result = this.asCfgNode().getScope() }
override BasicBlock getBasicBlock() { result = this.asCfgNode().getBasicBlock() }
override Location getLocation() { result = this.asCfgNode().getLocation() }
}
class EssaNode extends Node, TEssaNode {
override ControlFlowNode asCfgNode() { none() }
override EssaVariable asVariable() { this = TEssaNode(result) }
/** Gets a textual representation of this element. */
override string toString() { result = this.asVariable().toString() }
override Scope getScope() { result = this.asVariable().getScope() }
override BasicBlock getBasicBlock() {
result = this.asVariable().getDefinition().getBasicBlock()
}
override Location getLocation() { result = this.asVariable().getDefinition().getLocation() }
}
}
deprecated private class DataFlowType extends TaintKind {
DataFlowType() {
this = "Data flow" and
exists(DataFlow::Configuration c)
}
}
pragma[noinline]
private predicate dict_construct(ControlFlowNode itemnode, ControlFlowNode dictnode) {
dictnode.(DictNode).getAValue() = itemnode
or
dictnode.(CallNode).getFunction().pointsTo(ObjectInternal::builtin("dict")) and
dictnode.(CallNode).getArgByName(_) = itemnode
}
pragma[noinline]
private predicate sequence_construct(ControlFlowNode itemnode, ControlFlowNode seqnode) {
seqnode.isLoad() and
(
seqnode.(ListNode).getElement(_) = itemnode
or
seqnode.(TupleNode).getElement(_) = itemnode
or
seqnode.(SetNode).getAnElement() = itemnode
)
}
/* A call to construct a sequence from a sequence or iterator*/
pragma[noinline]
private predicate sequence_call(ControlFlowNode fromnode, CallNode tonode) {
tonode.getArg(0) = fromnode and
exists(ControlFlowNode cls | cls = tonode.getFunction() |
cls.pointsTo(ObjectInternal::builtin("list"))
or
cls.pointsTo(ObjectInternal::builtin("tuple"))
or
cls.pointsTo(ObjectInternal::builtin("set"))
)
}

View File

@@ -4,9 +4,9 @@
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private module Dill {
/** Gets a reference to the `dill` module. */

View File

@@ -4,10 +4,11 @@
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.dataflow.TaintTracking
private import experimental.semmle.python.Concepts
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.frameworks.PEP249
private import semmle.python.regex
/**
@@ -76,6 +77,10 @@ private module Django {
/** Gets a reference to the `django.db` module. */
DataFlow::Node db() { result = django_attr("db") }
class DjangoDb extends PEP249Module {
DjangoDb() { this = db() }
}
/** Provides models for the `django.db` module. */
module db {
/** Gets a reference to the `django.db.connection` object. */
@@ -92,45 +97,10 @@ private module Django {
/** Gets a reference to the `django.db.connection` object. */
DataFlow::Node connection() { result = connection(DataFlow::TypeTracker::end()) }
/** Provides models for the `django.db.connection.cursor` method. */
module cursor {
/** Gets a reference to the `django.db.connection.cursor` metod. */
private DataFlow::Node methodRef(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("django.db.connection.cursor")
or
t.startInAttr("cursor") and
result = connection()
or
exists(DataFlow::TypeTracker t2 | result = methodRef(t2).track(t2, t))
}
/** Gets a reference to the `django.db.connection.cursor` metod. */
DataFlow::Node methodRef() { result = methodRef(DataFlow::TypeTracker::end()) }
/** Gets a reference to a result of calling `django.db.connection.cursor`. */
private DataFlow::Node methodResult(DataFlow::TypeTracker t) {
t.start() and
result.asCfgNode().(CallNode).getFunction() = methodRef().asCfgNode()
or
exists(DataFlow::TypeTracker t2 | result = methodResult(t2).track(t2, t))
}
/** Gets a reference to a result of calling `django.db.connection.cursor`. */
DataFlow::Node methodResult() { result = methodResult(DataFlow::TypeTracker::end()) }
class DjangoDbConnection extends Connection::InstanceSource {
DjangoDbConnection() { this = connection() }
}
/** Gets a reference to the `django.db.connection.cursor.execute` function. */
private DataFlow::Node execute(DataFlow::TypeTracker t) {
t.startInAttr("execute") and
result = cursor::methodResult()
or
exists(DataFlow::TypeTracker t2 | result = execute(t2).track(t2, t))
}
/** Gets a reference to the `django.db.connection.cursor.execute` function. */
DataFlow::Node execute() { result = execute(DataFlow::TypeTracker::end()) }
// -------------------------------------------------------------------------
// django.db.models
// -------------------------------------------------------------------------
@@ -276,23 +246,6 @@ private module Django {
}
}
/**
* A call to the `django.db.connection.cursor.execute` function.
*
* See
* - https://docs.djangoproject.com/en/3.1/topics/db/sql/#executing-custom-sql-directly
* - https://docs.djangoproject.com/en/3.1/topics/db/sql/#connections-and-cursors
*/
private class DbConnectionExecute extends SqlExecution::Range, DataFlow::CfgNode {
override CallNode node;
DbConnectionExecute() { node.getFunction() = django::db::execute().asCfgNode() }
override DataFlow::Node getSql() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("sql")]
}
}
/**
* A call to the `annotate` function on a model using a `RawSQL` argument.
*

View File

@@ -8,9 +8,9 @@
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
/**
* Provides classes modeling security-relevant aspects of the `fabric` PyPI package, for

View File

@@ -4,11 +4,11 @@
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.dataflow.TaintTracking
private import experimental.semmle.python.Concepts
private import experimental.semmle.python.frameworks.Werkzeug
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.frameworks.Werkzeug
/**
* Provides models for the `flask` PyPI package.

View File

@@ -4,8 +4,8 @@
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
/**
* Provides models for the `invoke` PyPI package.

View File

@@ -0,0 +1,38 @@
/**
* Provides classes modeling security-relevant aspects of the `MySQLdb` PyPI package.
* See
* - https://mysqlclient.readthedocs.io/index.html
* - https://pypi.org/project/MySQL-python/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import PEP249
/**
* Provides models for the `MySQLdb` PyPI package.
* See
* - https://mysqlclient.readthedocs.io/index.html
* - https://pypi.org/project/MySQL-python/
*/
module MySQLdb {
// ---------------------------------------------------------------------------
// MySQLdb
// ---------------------------------------------------------------------------
/** Gets a reference to the `MySQLdb` module. */
private DataFlow::Node moduleMySQLdb(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("MySQLdb")
or
exists(DataFlow::TypeTracker t2 | result = moduleMySQLdb(t2).track(t2, t))
}
/** Gets a reference to the `MySQLdb` module. */
DataFlow::Node moduleMySQLdb() { result = moduleMySQLdb(DataFlow::TypeTracker::end()) }
class MySQLdb extends PEP249Module {
MySQLdb() { this = moduleMySQLdb() }
}
}

View File

@@ -0,0 +1,84 @@
/**
* Provides classes modeling security-relevant aspects of the `mysql-connector-python` package.
* See
* - https://dev.mysql.com/doc/connector-python/en/
* - https://dev.mysql.com/doc/connector-python/en/connector-python-example-connecting.html
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import PEP249
/**
* Provides models for the `mysql-connector-python` package.
* See
* - https://dev.mysql.com/doc/connector-python/en/
* - https://dev.mysql.com/doc/connector-python/en/connector-python-example-connecting.html
*/
module MysqlConnectorPython {
// ---------------------------------------------------------------------------
// mysql
// ---------------------------------------------------------------------------
/** Gets a reference to the `mysql` module. */
private DataFlow::Node mysql(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("mysql")
or
exists(DataFlow::TypeTracker t2 | result = mysql(t2).track(t2, t))
}
/** Gets a reference to the `mysql` module. */
DataFlow::Node mysql() { result = mysql(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the attribute `attr_name` of the `mysql` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node mysql_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["connector"] and
(
t.start() and
result = DataFlow::importNode("mysql" + "." + attr_name)
or
t.startInAttr(attr_name) and
result = mysql()
)
or
// Due to bad performance when using normal setup with `mysql_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
mysql_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate mysql_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(mysql_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `mysql` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node mysql_attr(string attr_name) {
result = mysql_attr(DataFlow::TypeTracker::end(), attr_name)
}
/** Provides models for the `mysql` module. */
module mysql {
/**
* The mysql.connector module
* See https://dev.mysql.com/doc/connector-python/en/connector-python-example-connecting.html
*/
class MysqlConnector extends PEP249Module {
MysqlConnector() { this = mysql_attr("connector") }
}
}
}

View File

@@ -0,0 +1,117 @@
/**
* Provides classes modeling PEP 249.
* See https://www.python.org/dev/peps/pep-0249/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
/** A module implementing PEP 249. Extend this class for implementations. */
abstract class PEP249Module extends DataFlow::Node { }
/** Gets a reference to a connect call. */
private DataFlow::Node connect(DataFlow::TypeTracker t) {
t.startInAttr("connect") and
result instanceof PEP249Module
or
exists(DataFlow::TypeTracker t2 | result = connect(t2).track(t2, t))
}
/** Gets a reference to a connect call. */
DataFlow::Node connect() { result = connect(DataFlow::TypeTracker::end()) }
/**
* Provides models for the `db.Connection` class
*
* See https://www.python.org/dev/peps/pep-0249/#connection-objects.
*/
module Connection {
/**
* A source of an instance of `db.Connection`.
*
* This can include instantiation of the class, return value from function
* calls, or a special parameter that will be set when functions are called by external
* libraries.
*
* Use `Connection::instance()` predicate to get references to instances of `db.Connection`.
*
* Extend this class if the module implementing PEP 249 offers more direct ways to obtain
* a connection than going through `connect`.
*/
abstract class InstanceSource extends DataFlow::Node { }
/** A direct instantiation of `db.Connection`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CfgNode {
override CallNode node;
ClassInstantiation() { node.getFunction() = connect().asCfgNode() }
}
/** Gets a reference to an instance of `db.Connection`. */
private DataFlow::Node instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `db.Connection`. */
DataFlow::Node instance() { result = instance(DataFlow::TypeTracker::end()) }
}
/**
* Provides models for the `db.Connection.cursor` method.
* See https://www.python.org/dev/peps/pep-0249/#cursor.
*/
module cursor {
/** Gets a reference to the `db.connection.cursor` method. */
private DataFlow::Node methodRef(DataFlow::TypeTracker t) {
t.startInAttr("cursor") and
result = Connection::instance()
or
exists(DataFlow::TypeTracker t2 | result = methodRef(t2).track(t2, t))
}
/** Gets a reference to the `db.connection.cursor` metod. */
DataFlow::Node methodRef() { result = methodRef(DataFlow::TypeTracker::end()) }
/** Gets a reference to a result of calling `db.connection.cursor`. */
private DataFlow::Node methodResult(DataFlow::TypeTracker t) {
t.start() and
result.asCfgNode().(CallNode).getFunction() = methodRef().asCfgNode()
or
exists(DataFlow::TypeTracker t2 | result = methodResult(t2).track(t2, t))
}
/** Gets a reference to a result of calling `db.connection.cursor`. */
DataFlow::Node methodResult() { result = methodResult(DataFlow::TypeTracker::end()) }
}
/**
* Gets a reference to the `db.Connection.Cursor.execute` function.
* See https://www.python.org/dev/peps/pep-0249/#id15.
*/
private DataFlow::Node execute(DataFlow::TypeTracker t) {
t.startInAttr("execute") and
result = cursor::methodResult()
or
exists(DataFlow::TypeTracker t2 | result = execute(t2).track(t2, t))
}
/**
* Gets a reference to the `db.Connection.Cursor.execute` function.
* See https://www.python.org/dev/peps/pep-0249/#id15.
*/
DataFlow::Node execute() { result = execute(DataFlow::TypeTracker::end()) }
private class DbConnectionExecute extends SqlExecution::Range, DataFlow::CfgNode {
override CallNode node;
DbConnectionExecute() { node.getFunction() = execute().asCfgNode() }
override DataFlow::Node getSql() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("sql")]
}
}

View File

@@ -4,10 +4,10 @@
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.TaintTracking
private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
/** Provides models for the Python standard library. */
private module Stdlib {
@@ -753,6 +753,131 @@ private class OpenCall extends FileSystemAccess::Range, DataFlow::CfgNode {
}
}
// ---------------------------------------------------------------------------
// base64
// ---------------------------------------------------------------------------
/** Gets a reference to the `base64` module. */
private DataFlow::Node base64(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("base64")
or
exists(DataFlow::TypeTracker t2 | result = base64(t2).track(t2, t))
}
/** Gets a reference to the `base64` module. */
DataFlow::Node base64() { result = base64(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the attribute `attr_name` of the `base64` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node base64_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["b64encode", "b64decode", "standard_b64encode", "standard_b64decode",
"urlsafe_b64encode", "urlsafe_b64decode", "b32encode", "b32decode", "b16encode",
"b16decode", "encodestring", "decodestring", "a85encode", "a85decode", "b85encode",
"b85decode", "encodebytes", "decodebytes"] and
(
t.start() and
result = DataFlow::importNode("base64" + "." + attr_name)
or
t.startInAttr(attr_name) and
result = base64()
)
or
// Due to bad performance when using normal setup with `base64_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
base64_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate base64_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(base64_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `base64` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node base64_attr(string attr_name) {
result = base64_attr(DataFlow::TypeTracker::end(), attr_name)
}
/** A call to any of the encode functions in the `base64` module. */
private class Base64EncodeCall extends Encoding::Range, DataFlow::CfgNode {
override CallNode node;
Base64EncodeCall() {
exists(string name |
name in ["b64encode", "standard_b64encode", "urlsafe_b64encode", "b32encode", "b16encode",
"encodestring", "a85encode", "b85encode", "encodebytes"] and
node.getFunction() = base64_attr(name).asCfgNode()
)
}
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getOutput() { result = this }
override string getFormat() {
exists(string name | node.getFunction() = base64_attr(name).asCfgNode() |
name in ["b64encode", "standard_b64encode", "urlsafe_b64encode", "encodestring", "encodebytes"] and
result = "Base64"
or
name = "b32encode" and result = "Base32"
or
name = "b16encode" and result = "Base16"
or
name = "a85encode" and result = "Ascii85"
or
name = "b85encode" and result = "Base85"
)
}
}
/** A call to any of the decode functions in the `base64` module. */
private class Base64DecodeCall extends Decoding::Range, DataFlow::CfgNode {
override CallNode node;
Base64DecodeCall() {
exists(string name |
name in ["b64decode", "standard_b64decode", "urlsafe_b64decode", "b32decode", "b16decode",
"decodestring", "a85decode", "b85decode", "decodebytes"] and
node.getFunction() = base64_attr(name).asCfgNode()
)
}
override predicate mayExecuteInput() { none() }
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getOutput() { result = this }
override string getFormat() {
exists(string name | node.getFunction() = base64_attr(name).asCfgNode() |
name in ["b64decode", "standard_b64decode", "urlsafe_b64decode", "decodestring", "decodebytes"] and
result = "Base64"
or
name = "b32decode" and result = "Base32"
or
name = "b16decode" and result = "Base16"
or
name = "a85decode" and result = "Ascii85"
or
name = "b85decode" and result = "Base85"
)
}
}
// ---------------------------------------------------------------------------
// OTHER
// ---------------------------------------------------------------------------
/**
* A call to the `startswith` method on a string.
* See https://docs.python.org/3.9/library/stdtypes.html#str.startswith

View File

@@ -3,8 +3,8 @@
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.TaintTracking
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
module Werkzeug {
/** Provides models for the `werkzeug` module. */

View File

@@ -4,9 +4,9 @@
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private module Yaml {
/** Gets a reference to the `yaml` module. */

View File

@@ -1,4 +1,4 @@
import experimental.dataflow.DataFlow
import semmle.python.dataflow.new.DataFlow
/**
* A configuration to find all flows.

View File

@@ -1,4 +1,4 @@
import experimental.dataflow.DataFlow
import semmle.python.dataflow.new.DataFlow
from DataFlow::Node fromNode, DataFlow::Node toNode
where DataFlow::localFlow(fromNode, toNode)

View File

@@ -1,4 +1,4 @@
import experimental.dataflow.DataFlow
import semmle.python.dataflow.new.DataFlow
from DataFlow::Node fromNode, DataFlow::Node toNode
where DataFlow::localFlowStep(fromNode, toNode)

View File

@@ -1,5 +1,5 @@
import experimental.dataflow.DataFlow
private import experimental.dataflow.internal.DataFlowPrivate as DataFlowPrivate
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
/**
* A configuration to find all "maximal" flows.

View File

@@ -1,6 +1,6 @@
private import python
import experimental.dataflow.DataFlow
private import experimental.dataflow.internal.DataFlowPrivate as DataFlowPrivate
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
/**
* A configuration to find the call graph edges.

View File

@@ -1 +1 @@
import experimental.dataflow.internal.DataFlowImplConsistency::Consistency
import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency

View File

@@ -3,9 +3,9 @@
*/
import python
import experimental.dataflow.DataFlow
import semmle.python.dataflow.new.DataFlow
import DataFlow::PathGraph
private import experimental.dataflow.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
/**
* A configuration to check routing of arguments through magic methods.

View File

@@ -3,7 +3,7 @@
*/
import python
import experimental.dataflow.DataFlow
import semmle.python.dataflow.new.DataFlow
import DataFlow::PathGraph
/**

View File

@@ -3,7 +3,7 @@
*/
import python
import experimental.dataflow.DataFlow
import semmle.python.dataflow.new.DataFlow
import DataFlow::PathGraph
/**

View File

@@ -3,7 +3,7 @@
*/
import python
import experimental.dataflow.DataFlow
import semmle.python.dataflow.new.DataFlow
import DataFlow::PathGraph
/**

View File

@@ -3,7 +3,7 @@
*/
import python
import experimental.dataflow.DataFlow
import semmle.python.dataflow.new.DataFlow
import DataFlow::PathGraph
/**

View File

@@ -3,7 +3,7 @@
*/
import python
import experimental.dataflow.DataFlow
import semmle.python.dataflow.new.DataFlow
import DataFlow::PathGraph
/**

View File

@@ -3,7 +3,7 @@
*/
import python
import experimental.dataflow.DataFlow
import semmle.python.dataflow.new.DataFlow
import DataFlow::PathGraph
/**

View File

@@ -1,5 +1,5 @@
import experimental.dataflow.DataFlow
private import experimental.dataflow.internal.DataFlowPrivate as DataFlowPrivate
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
/**
* A configuration to find the call graph edges.

View File

@@ -1,5 +1,5 @@
import python
import experimental.dataflow.DataFlow
import semmle.python.dataflow.new.DataFlow
from DataFlow::Node nodeFrom, DataFlow::Node nodeTo
where

Some files were not shown because too many files have changed in this diff Show More