mirror of
https://github.com/github/codeql.git
synced 2025-12-20 10:46:30 +01:00
Merge branch 'main' of github.com:github/codeql into python-port-unsafe-deserialization
This commit is contained in:
@@ -46,15 +46,16 @@ class CommandInjectionConfiguration extends TaintTracking::Configuration {
|
||||
// os.system(cmd)
|
||||
// ```
|
||||
//
|
||||
// Best solution I could come up with is to exclude all sinks inside the `os` and
|
||||
// `subprocess` modules. This does have a downside: If we have overlooked a function
|
||||
// in any of these, that internally runs a command, we no longer give an alert :|
|
||||
// Best solution I could come up with is to exclude all sinks inside the modules of
|
||||
// known sinks. This does have a downside: If we have overlooked a function in any
|
||||
// of these, that internally runs a command, we no longer give an alert :| -- and we
|
||||
// need to keep them updated (which is hard to remember)
|
||||
//
|
||||
// This does not only affect `os.popen`, but also the helper functions in
|
||||
// `subprocess`. See:
|
||||
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
|
||||
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
|
||||
not sink.getScope().getEnclosingModule().getName() in ["os", "subprocess"]
|
||||
not sink.getScope().getEnclosingModule().getName() in ["os", "subprocess", "platform", "popen2"]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
* @name Code injection
|
||||
* @description Interpreting unsanitized user input as code allows a malicious user to perform arbitrary
|
||||
* code execution.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @id py/code-injection
|
||||
* @tags security
|
||||
* external/owasp/owasp-a1
|
||||
* external/cwe/cwe-094
|
||||
* external/cwe/cwe-095
|
||||
* external/cwe/cwe-116
|
||||
*/
|
||||
|
||||
import python
|
||||
import experimental.dataflow.DataFlow
|
||||
import experimental.dataflow.TaintTracking
|
||||
import experimental.semmle.python.Concepts
|
||||
import experimental.dataflow.RemoteFlowSources
|
||||
import DataFlow::PathGraph
|
||||
|
||||
class CodeInjectionConfiguration extends TaintTracking::Configuration {
|
||||
CodeInjectionConfiguration() { this = "CodeInjectionConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sink = any(CodeExecution e).getCode() }
|
||||
}
|
||||
|
||||
from CodeInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
|
||||
source.getNode(), "A user-provided value"
|
||||
13
python/ql/src/experimental/Security-new-dataflow/promote.sh
Executable file
13
python/ql/src/experimental/Security-new-dataflow/promote.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
set -Eeuo pipefail # see https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/
|
||||
|
||||
# Promotes new dataflow queries to be the real ones
|
||||
|
||||
SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
cd $SCRIPTDIR
|
||||
for file in $(find . -mindepth 2); do
|
||||
echo "Promoting $file"
|
||||
mkdir -p "../../Security/$(dirname $file)"
|
||||
mv "$file" "../../Security/${file}"
|
||||
done
|
||||
@@ -71,7 +71,8 @@ module StepSummary {
|
||||
/** Holds if it's reasonable to expect the data flow step from `nodeFrom` to `nodeTo` to preserve types. */
|
||||
private predicate typePreservingStep(Node nodeFrom, Node nodeTo) {
|
||||
EssaFlow::essaFlowStep(nodeFrom, nodeTo) or
|
||||
jumpStep(nodeFrom, nodeTo)
|
||||
jumpStep(nodeFrom, nodeTo) or
|
||||
nodeFrom = nodeTo.(PostUpdateNode).getPreUpdateNode()
|
||||
}
|
||||
|
||||
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
|
||||
|
||||
@@ -150,22 +150,6 @@ module EssaFlow {
|
||||
// nodeTo is `y` on second line, cfg node
|
||||
useToNextUse(nodeFrom.asCfgNode(), nodeTo.asCfgNode())
|
||||
or
|
||||
// Refinements
|
||||
exists(EssaEdgeRefinement r |
|
||||
nodeTo.(EssaNode).getVar() = r.getVariable() and
|
||||
nodeFrom.(EssaNode).getVar() = r.getInput()
|
||||
)
|
||||
or
|
||||
exists(EssaNodeRefinement r |
|
||||
nodeTo.(EssaNode).getVar() = r.getVariable() and
|
||||
nodeFrom.(EssaNode).getVar() = r.getInput()
|
||||
)
|
||||
or
|
||||
exists(PhiFunction p |
|
||||
nodeTo.(EssaNode).getVar() = p.getVariable() and
|
||||
nodeFrom.(EssaNode).getVar() = p.getAnInput()
|
||||
)
|
||||
or
|
||||
// If expressions
|
||||
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
|
||||
}
|
||||
|
||||
@@ -97,6 +97,35 @@ module Decoding {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that dynamically executes Python code.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `CodeExecution::Range` instead.
|
||||
*/
|
||||
class CodeExecution extends DataFlow::Node {
|
||||
CodeExecution::Range range;
|
||||
|
||||
CodeExecution() { this = range }
|
||||
|
||||
/** Gets the argument that specifies the code to be executed. */
|
||||
DataFlow::Node getCode() { result = range.getCode() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new dynamic code execution APIs. */
|
||||
module CodeExecution {
|
||||
/**
|
||||
* A data-flow node that dynamically executes Python code.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `CodeExecution` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Gets the argument that specifies the code to be executed. */
|
||||
abstract DataFlow::Node getCode();
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides classes for modeling HTTP-related APIs. */
|
||||
module HTTP {
|
||||
/** Provides classes for modeling HTTP servers. */
|
||||
|
||||
@@ -32,7 +32,7 @@ private module Stdlib {
|
||||
* For example, using `attr_name = "system"` will get all uses of `os.system`.
|
||||
*/
|
||||
private DataFlow::Node os_attr(DataFlow::TypeTracker t, string attr_name) {
|
||||
attr_name in ["system", "popen",
|
||||
attr_name in ["system", "popen", "popen2", "popen3", "popen4",
|
||||
// exec
|
||||
"execl", "execle", "execlp", "execlpe", "execv", "execve", "execvp", "execvpe",
|
||||
// spawn
|
||||
@@ -111,15 +111,28 @@ private module Stdlib {
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `os.popen`
|
||||
* A call to any of the `os.popen*` functions
|
||||
* See https://docs.python.org/3/library/os.html#os.popen
|
||||
*
|
||||
* Note that in Python 2, there are also `popen2`, `popen3`, and `popen4` functions.
|
||||
* Although deprecated since version 2.6, they still work in 2.7.
|
||||
* See https://docs.python.org/2.7/library/os.html#os.popen2
|
||||
*/
|
||||
private class OsPopenCall extends SystemCommandExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
string name;
|
||||
|
||||
OsPopenCall() { node.getFunction() = os_attr("popen").asCfgNode() }
|
||||
OsPopenCall() {
|
||||
name in ["popen", "popen2", "popen3", "popen4"] and
|
||||
node.getFunction() = os_attr(name).asCfgNode()
|
||||
}
|
||||
|
||||
override DataFlow::Node getCommand() { result.asCfgNode() = node.getArg(0) }
|
||||
override DataFlow::Node getCommand() {
|
||||
result.asCfgNode() = node.getArg(0)
|
||||
or
|
||||
not name = "popen" and
|
||||
result.asCfgNode() = node.getArgByName("cmd")
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -242,29 +255,22 @@ private module Stdlib {
|
||||
* A call to `subprocess.Popen` or helper functions (call, check_call, check_output, run)
|
||||
* See https://docs.python.org/3.8/library/subprocess.html#subprocess.Popen
|
||||
*/
|
||||
private class SubprocessPopenCall extends SystemCommandExecution::Range {
|
||||
CallNode call;
|
||||
private class SubprocessPopenCall extends SystemCommandExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
SubprocessPopenCall() {
|
||||
call = this.asCfgNode() and
|
||||
exists(string name |
|
||||
name in ["Popen", "call", "check_call", "check_output", "run"] and
|
||||
call.getFunction() = subprocess_attr(name).asCfgNode()
|
||||
node.getFunction() = subprocess_attr(name).asCfgNode()
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the ControlFlowNode for the `args` argument, if any. */
|
||||
private ControlFlowNode get_args_arg() {
|
||||
result = call.getArg(0)
|
||||
or
|
||||
result = call.getArgByName("args")
|
||||
}
|
||||
private ControlFlowNode get_args_arg() { result in [node.getArg(0), node.getArgByName("args")] }
|
||||
|
||||
/** Gets the ControlFlowNode for the `shell` argument, if any. */
|
||||
private ControlFlowNode get_shell_arg() {
|
||||
result = call.getArg(8)
|
||||
or
|
||||
result = call.getArgByName("shell")
|
||||
result in [node.getArg(8), node.getArgByName("shell")]
|
||||
}
|
||||
|
||||
private boolean get_shell_arg_value() {
|
||||
@@ -286,9 +292,7 @@ private module Stdlib {
|
||||
|
||||
/** Gets the ControlFlowNode for the `executable` argument, if any. */
|
||||
private ControlFlowNode get_executable_arg() {
|
||||
result = call.getArg(2)
|
||||
or
|
||||
result = call.getArgByName("executable")
|
||||
result in [node.getArg(2), node.getArgByName("executable")]
|
||||
}
|
||||
|
||||
override DataFlow::Node getCommand() {
|
||||
@@ -424,4 +428,256 @@ private module Stdlib {
|
||||
|
||||
override string getFormat() { result = "pickle" }
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// popen2
|
||||
// ---------------------------------------------------------------------------
|
||||
/** Gets a reference to the `popen2` module (only available in Python 2). */
|
||||
private DataFlow::Node popen2(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode("popen2")
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = popen2(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `popen2` module (only available in Python 2). */
|
||||
DataFlow::Node popen2() { result = popen2(DataFlow::TypeTracker::end()) }
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `popen2` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node popen2_attr(DataFlow::TypeTracker t, string attr_name) {
|
||||
attr_name in ["popen2", "popen3", "popen4",
|
||||
// classes
|
||||
"Popen3", "Popen4"] and
|
||||
(
|
||||
t.start() and
|
||||
result = DataFlow::importNode("popen2." + attr_name)
|
||||
or
|
||||
t.startInAttr(attr_name) and
|
||||
result = DataFlow::importNode("popen2")
|
||||
)
|
||||
or
|
||||
// Due to bad performance when using normal setup with `popen2_attr(t2, attr_name).track(t2, t)`
|
||||
// we have inlined that code and forced a join
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
exists(DataFlow::StepSummary summary |
|
||||
popen2_attr_first_join(t2, attr_name, result, summary) and
|
||||
t = t2.append(summary)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate popen2_attr_first_join(
|
||||
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(popen2_attr(t2, attr_name), res, summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `popen2` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node popen2_attr(string attr_name) {
|
||||
result = popen2_attr(DataFlow::TypeTracker::end(), attr_name)
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to any of the `popen.popen*` functions, or instantiation of a `popen.Popen*` class.
|
||||
* See https://docs.python.org/2.7/library/popen2.html
|
||||
*/
|
||||
private class Popen2PopenCall extends SystemCommandExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
Popen2PopenCall() {
|
||||
exists(string name |
|
||||
name in ["popen2", "popen3", "popen4", "Popen3", "Popen4"] and
|
||||
node.getFunction() = popen2_attr(name).asCfgNode()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getCommand() {
|
||||
result.asCfgNode() in [node.getArg(0), node.getArgByName("cmd")]
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// platform
|
||||
// ---------------------------------------------------------------------------
|
||||
/** Gets a reference to the `platform` module. */
|
||||
private DataFlow::Node platform(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode("platform")
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = platform(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `platform` module. */
|
||||
DataFlow::Node platform() { result = platform(DataFlow::TypeTracker::end()) }
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `platform` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node platform_attr(DataFlow::TypeTracker t, string attr_name) {
|
||||
attr_name in ["popen"] and
|
||||
(
|
||||
t.start() and
|
||||
result = DataFlow::importNode("platform." + attr_name)
|
||||
or
|
||||
t.startInAttr(attr_name) and
|
||||
result = DataFlow::importNode("platform")
|
||||
)
|
||||
or
|
||||
// Due to bad performance when using normal setup with `platform_attr(t2, attr_name).track(t2, t)`
|
||||
// we have inlined that code and forced a join
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
exists(DataFlow::StepSummary summary |
|
||||
platform_attr_first_join(t2, attr_name, result, summary) and
|
||||
t = t2.append(summary)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate platform_attr_first_join(
|
||||
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(platform_attr(t2, attr_name), res, summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `platform` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node platform_attr(string attr_name) {
|
||||
result = platform_attr(DataFlow::TypeTracker::end(), attr_name)
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the `platform.popen` function.
|
||||
* See https://docs.python.org/2.7/library/platform.html#platform.popen
|
||||
*/
|
||||
private class PlatformPopenCall extends SystemCommandExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
PlatformPopenCall() { node.getFunction() = platform_attr("popen").asCfgNode() }
|
||||
|
||||
override DataFlow::Node getCommand() {
|
||||
result.asCfgNode() in [node.getArg(0), node.getArgByName("cmd")]
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// builtins
|
||||
// ---------------------------------------------------------------------------
|
||||
/** Gets a reference to the `builtins` module (called `__builtin__` in Python 2). */
|
||||
private DataFlow::Node builtins(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode(["builtins", "__builtin__"])
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = builtins(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `builtins` module. */
|
||||
DataFlow::Node builtins() { result = builtins(DataFlow::TypeTracker::end()) }
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `builtins` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node builtins_attr(DataFlow::TypeTracker t, string attr_name) {
|
||||
attr_name in ["exec", "eval", "compile"] and
|
||||
(
|
||||
t.start() and
|
||||
result = DataFlow::importNode(["builtins", "__builtin__"] + "." + attr_name)
|
||||
or
|
||||
t.startInAttr(attr_name) and
|
||||
result = DataFlow::importNode(["builtins", "__builtin__"])
|
||||
or
|
||||
// special handling of builtins, that are in scope without any imports
|
||||
// TODO: Take care of overrides, either `def eval: ...`, `eval = ...`, or `builtins.eval = ...`
|
||||
t.start() and
|
||||
exists(NameNode ref | result.asCfgNode() = ref |
|
||||
ref.isGlobal() and
|
||||
ref.getId() = attr_name and
|
||||
ref.isLoad()
|
||||
)
|
||||
)
|
||||
or
|
||||
// Due to bad performance when using normal setup with `builtins_attr(t2, attr_name).track(t2, t)`
|
||||
// we have inlined that code and forced a join
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
exists(DataFlow::StepSummary summary |
|
||||
builtins_attr_first_join(t2, attr_name, result, summary) and
|
||||
t = t2.append(summary)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate builtins_attr_first_join(
|
||||
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(builtins_attr(t2, attr_name), res, summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `builtins` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*/
|
||||
private DataFlow::Node builtins_attr(string attr_name) {
|
||||
result = builtins_attr(DataFlow::TypeTracker::end(), attr_name)
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the builtin `exec` function.
|
||||
* See https://docs.python.org/3/library/functions.html#exec
|
||||
*/
|
||||
private class BuiltinsExecCall extends CodeExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
BuiltinsExecCall() { node.getFunction() = builtins_attr("exec").asCfgNode() }
|
||||
|
||||
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to the builtin `eval` function.
|
||||
* See https://docs.python.org/3/library/functions.html#eval
|
||||
*/
|
||||
private class BuiltinsEvalCall extends CodeExecution::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
BuiltinsEvalCall() { node.getFunction() = builtins_attr("eval").asCfgNode() }
|
||||
|
||||
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
|
||||
}
|
||||
|
||||
/** An additional taint step for calls to the builtin function `compile` */
|
||||
private class BuiltinsCompileCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
exists(CallNode call |
|
||||
nodeTo.asCfgNode() = call and
|
||||
call.getFunction() = builtins_attr("compile").asCfgNode() and
|
||||
nodeFrom.asCfgNode() in [call.getArg(0), call.getArgByName("source")]
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An exec statement (only Python 2).
|
||||
* Se ehttps://docs.python.org/2/reference/simple_stmts.html#the-exec-statement.
|
||||
*/
|
||||
private class ExecStatement extends CodeExecution::Range {
|
||||
ExecStatement() {
|
||||
// since there are no DataFlow::Nodes for a Statement, we can't do anything like
|
||||
// `this = any(Exec exec)`
|
||||
this.asExpr() = any(Exec exec).getBody()
|
||||
}
|
||||
|
||||
override DataFlow::Node getCode() { result = this }
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user