mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Merge pull request #4435 from RasmusWL/python-port-code-injection
Python: port code injection query
This commit is contained in:
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
* @name Code injection
|
||||
* @description Interpreting unsanitized user input as code allows a malicious user to perform arbitrary
|
||||
* code execution.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @id py/code-injection
|
||||
* @tags security
|
||||
* external/owasp/owasp-a1
|
||||
* external/cwe/cwe-094
|
||||
* external/cwe/cwe-095
|
||||
* external/cwe/cwe-116
|
||||
*/
|
||||
|
||||
import python
|
||||
import experimental.dataflow.DataFlow
|
||||
import experimental.dataflow.TaintTracking
|
||||
import experimental.semmle.python.Concepts
|
||||
import experimental.dataflow.RemoteFlowSources
|
||||
import DataFlow::PathGraph
|
||||
|
||||
class CodeInjectionConfiguration extends TaintTracking::Configuration {
|
||||
CodeInjectionConfiguration() { this = "CodeInjectionConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sink = any(CodeExecution e).getCode() }
|
||||
}
|
||||
|
||||
from CodeInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
|
||||
source.getNode(), "A user-provided value"
|
||||
@@ -17,12 +17,12 @@ private import experimental.dataflow.RemoteFlowSources
|
||||
* extend `SystemCommandExecution::Range` instead.
|
||||
*/
|
||||
class SystemCommandExecution extends DataFlow::Node {
|
||||
SystemCommandExecution::Range self;
|
||||
SystemCommandExecution::Range range;
|
||||
|
||||
SystemCommandExecution() { this = self }
|
||||
SystemCommandExecution() { this = range }
|
||||
|
||||
/** Gets the argument that specifies the command to be executed. */
|
||||
DataFlow::Node getCommand() { result = self.getCommand() }
|
||||
DataFlow::Node getCommand() { result = range.getCommand() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new system-command execution APIs. */
|
||||
@@ -40,6 +40,35 @@ module SystemCommandExecution {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that dynamically executes Python code.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `CodeExecution::Range` instead.
|
||||
*/
|
||||
class CodeExecution extends DataFlow::Node {
|
||||
CodeExecution::Range range;
|
||||
|
||||
CodeExecution() { this = range }
|
||||
|
||||
/** Gets the argument that specifies the code to be executed. */
|
||||
DataFlow::Node getCode() { result = range.getCode() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new dynamic code execution APIs. */
|
||||
module CodeExecution {
|
||||
/**
|
||||
* A data-flow node that dynamically executes Python code.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `CodeExecution` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the argument that specifies the code to be executed. */
|
||||
abstract DataFlow::Node getCode();
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides classes for modeling HTTP-related APIs. */
|
||||
module HTTP {
|
||||
/** Provides classes for modeling HTTP servers. */
|
||||
|
||||
@@ -327,4 +327,115 @@ private module Stdlib {
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// builtins
|
||||
// ---------------------------------------------------------------------------
|
||||
/** Gets a reference to the `builtins` module (called `__builtin__` in Python 2). */
|
||||
private DataFlow::Node builtins(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode(["builtins", "__builtin__"])
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = builtins(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `builtins` module. */
|
||||
DataFlow::Node builtins() { result = builtins(DataFlow::TypeTracker::end()) }
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `builtins` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node builtins_attr(DataFlow::TypeTracker t, string attr_name) {
|
||||
attr_name in ["exec", "eval", "compile"] and
|
||||
(
|
||||
t.start() and
|
||||
result = DataFlow::importNode(["builtins", "__builtin__"] + "." + attr_name)
|
||||
or
|
||||
t.startInAttr(attr_name) and
|
||||
result = DataFlow::importNode(["builtins", "__builtin__"])
|
||||
or
|
||||
// special handling of builtins, that are in scope without any imports
|
||||
// TODO: Take care of overrides, either `def eval: ...`, `eval = ...`, or `builtins.eval = ...`
|
||||
t.start() and
|
||||
exists(NameNode ref | result.asCfgNode() = ref |
|
||||
ref.isGlobal() and
|
||||
ref.getId() = attr_name and
|
||||
ref.isLoad()
|
||||
)
|
||||
)
|
||||
or
|
||||
// Due to bad performance when using normal setup with `builtins_attr(t2, attr_name).track(t2, t)`
|
||||
// we have inlined that code and forced a join
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
exists(DataFlow::StepSummary summary |
|
||||
builtins_attr_first_join(t2, attr_name, result, summary) and
|
||||
t = t2.append(summary)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate builtins_attr_first_join(
|
||||
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(builtins_attr(t2, attr_name), res, summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `builtins` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node builtins_attr(string attr_name) {
|
||||
result = builtins_attr(DataFlow::TypeTracker::end(), attr_name)
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the builtin `exec` function.
|
||||
* See https://docs.python.org/3/library/functions.html#exec
|
||||
*/
|
||||
private class BuiltinsExecCall extends CodeExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
BuiltinsExecCall() { node.getFunction() = builtins_attr("exec").asCfgNode() }
|
||||
|
||||
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the builtin `eval` function.
|
||||
* See https://docs.python.org/3/library/functions.html#eval
|
||||
*/
|
||||
private class BuiltinsEvalCall extends CodeExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
BuiltinsEvalCall() { node.getFunction() = builtins_attr("eval").asCfgNode() }
|
||||
|
||||
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
|
||||
}
|
||||
|
||||
/** An additional taint step for calls to the builtin function `compile` */
|
||||
private class BuiltinsCompileCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
exists(CallNode call |
|
||||
nodeTo.asCfgNode() = call and
|
||||
call.getFunction() = builtins_attr("compile").asCfgNode() and
|
||||
nodeFrom.asCfgNode() in [call.getArg(0), call.getArgByName("source")]
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An exec statement (only Python 2).
|
||||
* Se ehttps://docs.python.org/2/reference/simple_stmts.html#the-exec-statement.
|
||||
*/
|
||||
private class ExecStatement extends CodeExecution::Range {
|
||||
ExecStatement() {
|
||||
// since there are no DataFlow::Nodes for a Statement, we can't do anything like
|
||||
// `this = any(Exec exec)`
|
||||
this.asExpr() = any(Exec exec).getBody()
|
||||
}
|
||||
|
||||
override DataFlow::Node getCode() { result = this }
|
||||
}
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
# exec statement is Python 2 specific
|
||||
exec "print(42)" # $getCode="print(42)"
|
||||
@@ -0,0 +1,2 @@
|
||||
import python
|
||||
import experimental.meta.ConceptsTest
|
||||
@@ -0,0 +1 @@
|
||||
semmle-extractor-options: --max-import-depth=1 --lang=2
|
||||
@@ -0,0 +1,4 @@
|
||||
import builtins
|
||||
|
||||
# exec being part of builtins is Python 3 only
|
||||
builtins.exec("print(42)") # $getCode="print(42)"
|
||||
@@ -0,0 +1,2 @@
|
||||
import python
|
||||
import experimental.meta.ConceptsTest
|
||||
@@ -0,0 +1 @@
|
||||
semmle-extractor-options: --max-import-depth=1 --lang=3
|
||||
@@ -0,0 +1,39 @@
|
||||
# without this, `eval("print(42)")` becomes invalid syntax in Python 2, since print is a
|
||||
# statement
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
|
||||
if sys.version_info[0] == 3:
|
||||
import builtins
|
||||
if sys.version_info[0] == 2:
|
||||
import __builtin__ as builtins
|
||||
|
||||
exec("print(42)") # $getCode="print(42)"
|
||||
eval("print(42)") # $getCode="print(42)"
|
||||
|
||||
builtins.eval("print(42)") # $getCode="print(42)"
|
||||
|
||||
cmd = compile("print(42)", "<filename>", "exec")
|
||||
exec(cmd) # $getCode=cmd
|
||||
|
||||
cmd = builtins.compile("print(42)", "<filename>", "exec")
|
||||
exec(cmd) # $getCode=cmd
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# taint related
|
||||
|
||||
|
||||
def test_additional_taint():
|
||||
src = TAINTED_STRING
|
||||
|
||||
cmd1 = compile(src, "<filename>", "exec")
|
||||
cmd2 = compile(source=src, filename="<filename>", mode="exec")
|
||||
cmd3 = builtins.compile(src, "<filename>", "exec")
|
||||
|
||||
ensure_tainted(
|
||||
src,
|
||||
cmd1,
|
||||
cmd2,
|
||||
cmd3,
|
||||
)
|
||||
@@ -0,0 +1,11 @@
|
||||
# without this, `eval("print(42)")` becomes invalid syntax in Python 2, since print is a
|
||||
# statement
|
||||
from __future__ import print_function
|
||||
|
||||
|
||||
def eval(*args, **kwargs):
|
||||
raise Exception("no eval")
|
||||
|
||||
|
||||
# This function call might be marked as a code execution, but it actually isn't.
|
||||
eval("print(42)") # $f+:getCode="print(42)"
|
||||
@@ -0,0 +1,13 @@
|
||||
# without this, `eval("print(42)")` becomes invalid syntax in Python 2, since print is a
|
||||
# statement
|
||||
from __future__ import print_function
|
||||
|
||||
|
||||
def foo(*args, **kwargs):
|
||||
raise Exception("no eval")
|
||||
|
||||
|
||||
eval = foo
|
||||
|
||||
# This function call might be marked as a code execution, but it actually isn't.
|
||||
eval("print(42)") # $f+:getCode="print(42)"
|
||||
@@ -0,0 +1,19 @@
|
||||
# without this, `eval("print(42)")` becomes invalid syntax in Python 2, since print is a
|
||||
# statement
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
|
||||
if sys.version_info[0] == 3:
|
||||
import builtins
|
||||
if sys.version_info[0] == 2:
|
||||
import __builtin__ as builtins
|
||||
|
||||
|
||||
def foo(*args, **kwargs):
|
||||
raise Exception("no eval")
|
||||
|
||||
|
||||
builtins.eval = foo
|
||||
|
||||
# This function call might be marked as a code execution, but it actually isn't.
|
||||
eval("print(42)") # $f+:getCode="print(42)"
|
||||
@@ -0,0 +1,4 @@
|
||||
| CodeExecution.py:35 | ok | test_additional_taint | src |
|
||||
| CodeExecution.py:36 | ok | test_additional_taint | cmd1 |
|
||||
| CodeExecution.py:37 | ok | test_additional_taint | cmd2 |
|
||||
| CodeExecution.py:38 | ok | test_additional_taint | cmd3 |
|
||||
@@ -0,0 +1,2 @@
|
||||
import experimental.dataflow.tainttracking.TestTaintLib
|
||||
import experimental.dataflow.RemoteFlowSources
|
||||
@@ -33,6 +33,23 @@ class SystemCommandExecutionTest extends InlineExpectationsTest {
|
||||
}
|
||||
}
|
||||
|
||||
class CodeExecutionTest extends InlineExpectationsTest {
|
||||
CodeExecutionTest() { this = "CodeExecutionTest" }
|
||||
|
||||
override string getARelevantTag() { result = "getCode" }
|
||||
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(CodeExecution ce, DataFlow::Node code |
|
||||
exists(location.getFile().getRelativePath()) and
|
||||
code = ce.getCode() and
|
||||
location = code.getLocation() and
|
||||
element = code.toString() and
|
||||
value = value_from_expr(code.asExpr()) and
|
||||
tag = "getCode"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class HttpServerRouteSetupTest extends InlineExpectationsTest {
|
||||
HttpServerRouteSetupTest() { this = "HttpServerRouteSetupTest" }
|
||||
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
edges
|
||||
| code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | code_injection.py:7:10:7:13 | ControlFlowNode for code |
|
||||
| code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | code_injection.py:8:10:8:13 | ControlFlowNode for code |
|
||||
| code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | code_injection.py:10:10:10:12 | ControlFlowNode for cmd |
|
||||
nodes
|
||||
| code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
|
||||
| code_injection.py:7:10:7:13 | ControlFlowNode for code | semmle.label | ControlFlowNode for code |
|
||||
| code_injection.py:8:10:8:13 | ControlFlowNode for code | semmle.label | ControlFlowNode for code |
|
||||
| code_injection.py:10:10:10:12 | ControlFlowNode for cmd | semmle.label | ControlFlowNode for cmd |
|
||||
#select
|
||||
| code_injection.py:7:10:7:13 | ControlFlowNode for code | code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | code_injection.py:7:10:7:13 | ControlFlowNode for code | $@ flows to here and is interpreted as code. | code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | A user-provided value |
|
||||
| code_injection.py:8:10:8:13 | ControlFlowNode for code | code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | code_injection.py:8:10:8:13 | ControlFlowNode for code | $@ flows to here and is interpreted as code. | code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | A user-provided value |
|
||||
| code_injection.py:10:10:10:12 | ControlFlowNode for cmd | code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | code_injection.py:10:10:10:12 | ControlFlowNode for cmd | $@ flows to here and is interpreted as code. | code_injection.py:6:12:6:23 | ControlFlowNode for Attribute | A user-provided value |
|
||||
@@ -0,0 +1 @@
|
||||
experimental/Security-new-dataflow/CWE-094/CodeInjection.ql
|
||||
@@ -0,0 +1,10 @@
|
||||
from flask import Flask, request
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/code-execution")
|
||||
def code_execution():
|
||||
code = request.args.get("code")
|
||||
exec(code)
|
||||
eval(code)
|
||||
cmd = compile(code, "<filename>", "exec")
|
||||
exec(cmd)
|
||||
Reference in New Issue
Block a user