Merge pull request #4435 from RasmusWL/python-port-code-injection

Python: port code injection query
This commit is contained in:
Taus
2020-10-14 16:41:42 +02:00
committed by GitHub
21 changed files with 319 additions and 3 deletions

View File

@@ -0,0 +1,35 @@
/**
* @name Code injection
* @description Interpreting unsanitized user input as code allows a malicious user to perform arbitrary
* code execution.
* @kind path-problem
* @problem.severity error
* @sub-severity high
* @precision high
* @id py/code-injection
* @tags security
* external/owasp/owasp-a1
* external/cwe/cwe-094
* external/cwe/cwe-095
* external/cwe/cwe-116
*/
import python
import experimental.dataflow.DataFlow
import experimental.dataflow.TaintTracking
import experimental.semmle.python.Concepts
import experimental.dataflow.RemoteFlowSources
import DataFlow::PathGraph
class CodeInjectionConfiguration extends TaintTracking::Configuration {
CodeInjectionConfiguration() { this = "CodeInjectionConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink = any(CodeExecution e).getCode() }
}
from CodeInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
source.getNode(), "A user-provided value"

View File

@@ -17,12 +17,12 @@ private import experimental.dataflow.RemoteFlowSources
* extend `SystemCommandExecution::Range` instead.
*/
class SystemCommandExecution extends DataFlow::Node {
SystemCommandExecution::Range self;
SystemCommandExecution::Range range;
SystemCommandExecution() { this = self }
SystemCommandExecution() { this = range }
/** Gets the argument that specifies the command to be executed. */
DataFlow::Node getCommand() { result = self.getCommand() }
DataFlow::Node getCommand() { result = range.getCommand() }
}
/** Provides a class for modeling new system-command execution APIs. */
@@ -40,6 +40,35 @@ module SystemCommandExecution {
}
}
/**
* A data-flow node that dynamically executes Python code.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `CodeExecution::Range` instead.
*/
class CodeExecution extends DataFlow::Node {
CodeExecution::Range range;
CodeExecution() { this = range }
/** Gets the argument that specifies the code to be executed. */
DataFlow::Node getCode() { result = range.getCode() }
}
/** Provides a class for modeling new dynamic code execution APIs. */
module CodeExecution {
/**
* A data-flow node that dynamically executes Python code.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `CodeExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the code to be executed. */
abstract DataFlow::Node getCode();
}
}
/** Provides classes for modeling HTTP-related APIs. */
module HTTP {
/** Provides classes for modeling HTTP servers. */

View File

@@ -327,4 +327,115 @@ private module Stdlib {
)
}
}
// ---------------------------------------------------------------------------
// builtins
// ---------------------------------------------------------------------------
/** Gets a reference to the `builtins` module (called `__builtin__` in Python 2). */
private DataFlow::Node builtins(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode(["builtins", "__builtin__"])
or
exists(DataFlow::TypeTracker t2 | result = builtins(t2).track(t2, t))
}
/** Gets a reference to the `builtins` module. */
DataFlow::Node builtins() { result = builtins(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the attribute `attr_name` of the `builtins` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node builtins_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["exec", "eval", "compile"] and
(
t.start() and
result = DataFlow::importNode(["builtins", "__builtin__"] + "." + attr_name)
or
t.startInAttr(attr_name) and
result = DataFlow::importNode(["builtins", "__builtin__"])
or
// special handling of builtins, that are in scope without any imports
// TODO: Take care of overrides, either `def eval: ...`, `eval = ...`, or `builtins.eval = ...`
t.start() and
exists(NameNode ref | result.asCfgNode() = ref |
ref.isGlobal() and
ref.getId() = attr_name and
ref.isLoad()
)
)
or
// Due to bad performance when using normal setup with `builtins_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
builtins_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate builtins_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(builtins_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `builtins` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node builtins_attr(string attr_name) {
result = builtins_attr(DataFlow::TypeTracker::end(), attr_name)
}
/**
* A call to the builtin `exec` function.
* See https://docs.python.org/3/library/functions.html#exec
*/
private class BuiltinsExecCall extends CodeExecution::Range, DataFlow::CfgNode {
override CallNode node;
BuiltinsExecCall() { node.getFunction() = builtins_attr("exec").asCfgNode() }
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
}
/**
* A call to the builtin `eval` function.
* See https://docs.python.org/3/library/functions.html#eval
*/
private class BuiltinsEvalCall extends CodeExecution::Range, DataFlow::CfgNode {
override CallNode node;
BuiltinsEvalCall() { node.getFunction() = builtins_attr("eval").asCfgNode() }
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
}
/** An additional taint step for calls to the builtin function `compile` */
private class BuiltinsCompileCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(CallNode call |
nodeTo.asCfgNode() = call and
call.getFunction() = builtins_attr("compile").asCfgNode() and
nodeFrom.asCfgNode() in [call.getArg(0), call.getArgByName("source")]
)
}
}
}
/**
* An exec statement (only Python 2).
* Se ehttps://docs.python.org/2/reference/simple_stmts.html#the-exec-statement.
*/
private class ExecStatement extends CodeExecution::Range {
ExecStatement() {
// since there are no DataFlow::Nodes for a Statement, we can't do anything like
// `this = any(Exec exec)`
this.asExpr() = any(Exec exec).getBody()
}
override DataFlow::Node getCode() { result = this }
}

View File

@@ -0,0 +1,2 @@
# exec statement is Python 2 specific
exec "print(42)" # $getCode="print(42)"

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=1 --lang=2

View File

@@ -0,0 +1,4 @@
import builtins
# exec being part of builtins is Python 3 only
builtins.exec("print(42)") # $getCode="print(42)"

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=1 --lang=3

View File

@@ -0,0 +1,39 @@
# without this, `eval("print(42)")` becomes invalid syntax in Python 2, since print is a
# statement
from __future__ import print_function
import sys
if sys.version_info[0] == 3:
import builtins
if sys.version_info[0] == 2:
import __builtin__ as builtins
exec("print(42)") # $getCode="print(42)"
eval("print(42)") # $getCode="print(42)"
builtins.eval("print(42)") # $getCode="print(42)"
cmd = compile("print(42)", "<filename>", "exec")
exec(cmd) # $getCode=cmd
cmd = builtins.compile("print(42)", "<filename>", "exec")
exec(cmd) # $getCode=cmd
# ------------------------------------------------------------------------------
# taint related
def test_additional_taint():
src = TAINTED_STRING
cmd1 = compile(src, "<filename>", "exec")
cmd2 = compile(source=src, filename="<filename>", mode="exec")
cmd3 = builtins.compile(src, "<filename>", "exec")
ensure_tainted(
src,
cmd1,
cmd2,
cmd3,
)

View File

@@ -0,0 +1,11 @@
# without this, `eval("print(42)")` becomes invalid syntax in Python 2, since print is a
# statement
from __future__ import print_function
def eval(*args, **kwargs):
raise Exception("no eval")
# This function call might be marked as a code execution, but it actually isn't.
eval("print(42)") # $f+:getCode="print(42)"

View File

@@ -0,0 +1,13 @@
# without this, `eval("print(42)")` becomes invalid syntax in Python 2, since print is a
# statement
from __future__ import print_function
def foo(*args, **kwargs):
raise Exception("no eval")
eval = foo
# This function call might be marked as a code execution, but it actually isn't.
eval("print(42)") # $f+:getCode="print(42)"

View File

@@ -0,0 +1,19 @@
# without this, `eval("print(42)")` becomes invalid syntax in Python 2, since print is a
# statement
from __future__ import print_function
import sys
if sys.version_info[0] == 3:
import builtins
if sys.version_info[0] == 2:
import __builtin__ as builtins
def foo(*args, **kwargs):
raise Exception("no eval")
builtins.eval = foo
# This function call might be marked as a code execution, but it actually isn't.
eval("print(42)") # $f+:getCode="print(42)"

View File

@@ -0,0 +1,4 @@
| CodeExecution.py:35 | ok | test_additional_taint | src |
| CodeExecution.py:36 | ok | test_additional_taint | cmd1 |
| CodeExecution.py:37 | ok | test_additional_taint | cmd2 |
| CodeExecution.py:38 | ok | test_additional_taint | cmd3 |

View File

@@ -0,0 +1,2 @@
import experimental.dataflow.tainttracking.TestTaintLib
import experimental.dataflow.RemoteFlowSources

View File

@@ -33,6 +33,23 @@ class SystemCommandExecutionTest extends InlineExpectationsTest {
}
}
class CodeExecutionTest extends InlineExpectationsTest {
CodeExecutionTest() { this = "CodeExecutionTest" }
override string getARelevantTag() { result = "getCode" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(CodeExecution ce, DataFlow::Node code |
exists(location.getFile().getRelativePath()) and
code = ce.getCode() and
location = code.getLocation() and
element = code.toString() and
value = value_from_expr(code.asExpr()) and
tag = "getCode"
)
}
}
class HttpServerRouteSetupTest extends InlineExpectationsTest {
HttpServerRouteSetupTest() { this = "HttpServerRouteSetupTest" }

View File

@@ -0,0 +1,13 @@
edges
| code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | code_injection.py:7:10:7:13 | ControlFlowNode for code |
| code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | code_injection.py:8:10:8:13 | ControlFlowNode for code |
| code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | code_injection.py:10:10:10:12 | ControlFlowNode for cmd |
nodes
| code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| code_injection.py:7:10:7:13 | ControlFlowNode for code | semmle.label | ControlFlowNode for code |
| code_injection.py:8:10:8:13 | ControlFlowNode for code | semmle.label | ControlFlowNode for code |
| code_injection.py:10:10:10:12 | ControlFlowNode for cmd | semmle.label | ControlFlowNode for cmd |
#select
| code_injection.py:7:10:7:13 | ControlFlowNode for code | code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | code_injection.py:7:10:7:13 | ControlFlowNode for code | $@ flows to here and is interpreted as code. | code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | A user-provided value |
| code_injection.py:8:10:8:13 | ControlFlowNode for code | code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | code_injection.py:8:10:8:13 | ControlFlowNode for code | $@ flows to here and is interpreted as code. | code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | A user-provided value |
| code_injection.py:10:10:10:12 | ControlFlowNode for cmd | code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | code_injection.py:10:10:10:12 | ControlFlowNode for cmd | $@ flows to here and is interpreted as code. | code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | A user-provided value |

View File

@@ -0,0 +1 @@
experimental/Security-new-dataflow/CWE-094/CodeInjection.ql

View File

@@ -0,0 +1,10 @@
from flask import Flask, request
app = Flask(__name__)
@app.route("/code-execution")
def code_execution():
code = request.args.get("code")
exec(code)
eval(code)
cmd = compile(code, "<filename>", "exec")
exec(cmd)