mirror of
https://github.com/github/codeql.git
synced 2025-12-20 10:46:30 +01:00
Python: Untrusted data used in external APIs
A port of the one for Java that was added in https://github.com/github/codeql/pull/3938
This commit is contained in:
@@ -0,0 +1,7 @@
|
||||
from flask import Flask, request, make_response
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/xss")
|
||||
def xss():
|
||||
username = request.args.get("username")
|
||||
return make_response("Hello {}".format(username))
|
||||
@@ -0,0 +1,12 @@
|
||||
import base64
|
||||
import pickle
|
||||
|
||||
from flask import Flask, request, make_response
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route("/example")
|
||||
def profile():
|
||||
raw_data = request.args.get("data").encode('utf-8')
|
||||
data = base64.decodebytes(raw_data)
|
||||
obj = pickle.loads(data)
|
||||
...
|
||||
196
python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
Normal file
196
python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
Normal file
@@ -0,0 +1,196 @@
|
||||
/**
|
||||
* Definitions for reasoning about untrusted data used in APIs defined outside the
|
||||
* database.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.Concepts
|
||||
import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
|
||||
private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate
|
||||
private import semmle.python.types.Builtins
|
||||
private import semmle.python.objects.ObjectInternal
|
||||
|
||||
// IMPLEMENTATION NOTES:
|
||||
//
|
||||
// This query uses *both* the new data-flow library, and points-to. Why? To get this
|
||||
// finished quickly, so it can provide value for our field team and ourselves.
|
||||
//
|
||||
// In the long run, it should not need to use points-to for anything. Possibly this can
|
||||
// even be helpful in figuring out what we need from TypeTrackers and the new data-flow
|
||||
// library to be fully operational.
|
||||
//
|
||||
// At least it will allow us to provide a baseline comparison against a solution that
|
||||
// doesn't use points-to at all
|
||||
//
|
||||
// There is a few dirty things we do here:
|
||||
// 1. DataFlowPrivate: since `DataFlowCall` and `DataFlowCallable` are not exposed
|
||||
// publicly, but we really want access to them.
|
||||
// 2. points-to: we kinda need to do this since this is what powers `DataFlowCall` and
|
||||
// `DataFlowCallable`
|
||||
// 3. ObjectInternal: to provide better names for built-in functions and methods. If we
|
||||
// really wanted to polish our points-to implementation, we could move this
|
||||
// functionality into `BuiltinFunctionValue` and `BuiltinMethodValue`, but will
|
||||
// probably require some more work: for this query, it's totally ok to use
|
||||
// `builtins.open` for the code `open(f)`, but well, it requires a bit of thinking to
|
||||
// figure out if that is desireable in general. I simply skipped a corner here!
|
||||
// 4. TaintTrackingPrivate: Nothing else gives us access to `defaultAdditionalTaintStep` :(
|
||||
/**
|
||||
* A callable that is considered a "safe" external API from a security perspective.
|
||||
*/
|
||||
class SafeExternalAPI extends Unit {
|
||||
abstract DataFlowPrivate::DataFlowCallable getSafeCallable();
|
||||
}
|
||||
|
||||
/** The default set of "safe" external APIs. */
|
||||
private class DefaultSafeExternalAPI extends SafeExternalAPI {
|
||||
override DataFlowPrivate::DataFlowCallable getSafeCallable() {
|
||||
exists(CallableValue cv | cv = result.getCallableValue() |
|
||||
cv = Value::named(["len", "isinstance", "getattr", "hasattr"])
|
||||
or
|
||||
exists(ClassValue cls, string attr |
|
||||
cls = Value::named("dict") and attr in ["__getitem__", "__setitem__"]
|
||||
|
|
||||
cls.lookup(attr) = cv
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A node representing data being passed to an external API through a call. */
|
||||
class ExternalAPIDataNode extends DataFlow::Node {
|
||||
DataFlowPrivate::DataFlowCall call;
|
||||
DataFlowPrivate::DataFlowCallable callable;
|
||||
int i;
|
||||
|
||||
ExternalAPIDataNode() {
|
||||
exists(call.getLocation().getFile().getRelativePath()) and
|
||||
callable = call.getCallable() and
|
||||
not any(SafeExternalAPI safe).getSafeCallable() = callable and
|
||||
exists(Value cv | cv = callable.getCallableValue() |
|
||||
cv.isAbsent()
|
||||
or
|
||||
cv.isBuiltin()
|
||||
or
|
||||
cv.(CallableValue).getScope().getLocation().getFile().inStdlib()
|
||||
or
|
||||
not exists(cv.(CallableValue).getScope().getLocation().getFile().getRelativePath())
|
||||
) and
|
||||
// TODO: this ignores some complexity of keyword arguments (especially keyword-only args)
|
||||
this = call.getArg(i) and
|
||||
// Not already modeled as a taint step
|
||||
not exists(DataFlow::Node next | TaintTrackingPrivate::defaultAdditionalTaintStep(this, next)) and
|
||||
// for `list.append(x)`, we have a additional taint step from x -> [post] list.
|
||||
// Since we have modeled this explicitly, I don't see any cases where we would want to report this.
|
||||
not exists(DataFlow::Node prev, DataFlow::PostUpdateNode post |
|
||||
post.getPreUpdateNode() = this and
|
||||
TaintTrackingPrivate::defaultAdditionalTaintStep(prev, post)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the index for the parameter that will receive this untrusted data */
|
||||
int getIndex() { result = i }
|
||||
|
||||
/** Gets the callable to which this argument is passed. */
|
||||
DataFlowPrivate::DataFlowCallable getCallable() { result = callable }
|
||||
}
|
||||
|
||||
/** A configuration for tracking flow from `RemoteFlowSource`s to `ExternalAPIDataNode`s. */
|
||||
class UntrustedDataToExternalAPIConfig extends TaintTracking::Configuration {
|
||||
UntrustedDataToExternalAPIConfig() { this = "UntrustedDataToExternalAPIConfig" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sink instanceof ExternalAPIDataNode }
|
||||
}
|
||||
|
||||
/** A node representing untrusted data being passed to an external API. */
|
||||
class UntrustedExternalAPIDataNode extends ExternalAPIDataNode {
|
||||
UntrustedExternalAPIDataNode() { any(UntrustedDataToExternalAPIConfig c).hasFlow(_, this) }
|
||||
|
||||
/** Gets a source of untrusted data which is passed to this external API data node. */
|
||||
DataFlow::Node getAnUntrustedSource() {
|
||||
any(UntrustedDataToExternalAPIConfig c).hasFlow(result, this)
|
||||
}
|
||||
}
|
||||
|
||||
private newtype TExternalAPI =
|
||||
TExternalAPIParameter(DataFlowPrivate::DataFlowCallable callable, int index) {
|
||||
exists(UntrustedExternalAPIDataNode n |
|
||||
callable = n.getCallable() and
|
||||
index = n.getIndex()
|
||||
)
|
||||
}
|
||||
|
||||
/** An external API which is used with untrusted data. */
|
||||
class ExternalAPIUsedWithUntrustedData extends TExternalAPI {
|
||||
/** Gets a possibly untrusted use of this external API. */
|
||||
UntrustedExternalAPIDataNode getUntrustedDataNode() {
|
||||
this = TExternalAPIParameter(result.getCallable(), result.getIndex())
|
||||
}
|
||||
|
||||
/** Gets the number of untrusted sources used with this external API. */
|
||||
int getNumberOfUntrustedSources() {
|
||||
result = count(getUntrustedDataNode().getAnUntrustedSource())
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() {
|
||||
exists(
|
||||
DataFlowPrivate::DataFlowCallable callable, int index, string callableString,
|
||||
string indexString
|
||||
|
|
||||
this = TExternalAPIParameter(callable, index) and
|
||||
indexString = "param " + index and
|
||||
exists(CallableValue cv | cv = callable.getCallableValue() |
|
||||
callableString =
|
||||
cv.getScope().getEnclosingModule().getName() + "." + cv.getScope().getQualifiedName()
|
||||
or
|
||||
not exists(cv.getScope()) and
|
||||
(
|
||||
cv instanceof BuiltinFunctionValue and
|
||||
callableString = pretty_builtin_function_value(cv)
|
||||
or
|
||||
cv instanceof BuiltinMethodValue and
|
||||
callableString = pretty_builtin_method_value(cv)
|
||||
or
|
||||
not cv instanceof BuiltinFunctionValue and
|
||||
not cv instanceof BuiltinMethodValue and
|
||||
callableString = cv.toString()
|
||||
)
|
||||
) and
|
||||
result = callableString + " [" + indexString + "]"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** Gets the fully qualified name for the `BuiltinFunctionValue` bfv. */
|
||||
private string pretty_builtin_function_value(BuiltinFunctionValue bfv) {
|
||||
exists(Builtin b | b = bfv.(BuiltinFunctionObjectInternal).getBuiltin() |
|
||||
result = prefix_with_module_if_found(b)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the fully qualified name for the `BuiltinMethodValue` bmv. */
|
||||
private string pretty_builtin_method_value(BuiltinMethodValue bmv) {
|
||||
exists(Builtin b | b = bmv.(BuiltinMethodObjectInternal).getBuiltin() |
|
||||
exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b |
|
||||
result = prefix_with_module_if_found(cls) + "." + b.getName()
|
||||
)
|
||||
or
|
||||
not exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b) and
|
||||
result = b.getName()
|
||||
)
|
||||
}
|
||||
|
||||
/** Helper predicate that tries to adds module qualifier to `b`. Will succeed even if module not found. */
|
||||
private string prefix_with_module_if_found(Builtin b) {
|
||||
exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b |
|
||||
result = mod.getName() + "." + b.getName()
|
||||
)
|
||||
or
|
||||
not exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b) and
|
||||
result = b.getName()
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>Using unsanitized untrusted data in an external API can cause a variety of security
|
||||
issues. This query reports external APIs that are used with untrusted data, along
|
||||
with how frequently the API is used, and how many unique sources of untrusted data flow
|
||||
to this API. This query is designed primarily to help identify which APIs may be
|
||||
relevant for security analysis of this application.</p>
|
||||
|
||||
<p>An external API is defined as a call to a method that is not defined in the source
|
||||
code, and is not modeled as a taint step in the default taint library. External APIs may
|
||||
be from the Python standard library or dependencies. The query will report the fully qualified name,
|
||||
along with <code>[param x]</code>, where <code>x</code> indicates the position of
|
||||
the parameter receiving the untrusted data. Note that for methods and
|
||||
<code>classmethod</code>s, parameter 0 represents the class instance or class itself
|
||||
respectively.</p>
|
||||
|
||||
<p>Note that an excepted sink might not be included in the results, if it also defines a
|
||||
taint step. This is the case for <code>pickle.loads</code> which is a sink for the
|
||||
Unsafe Deserialization query, but is also a taint step for other queries.</p>
|
||||
|
||||
<p>Note: Compared to the Java version of this query, we currently do not give special
|
||||
care to methods that are overridden in the source code.</p>
|
||||
|
||||
<p>Note: Currently this query will only report results for external packages that are extracted.</p>
|
||||
|
||||
</overview>
|
||||
<recommendation>
|
||||
|
||||
<p>For each result:</p>
|
||||
|
||||
<ul>
|
||||
<li>If the result highlights a known sink, no action is required.</li>
|
||||
<li>If the result highlights an unknown sink for a problem, then add modeling for the sink to the relevant query.</li>
|
||||
<li>If the result represents a call to an external API which transfers taint, add the appropriate modeling, and
|
||||
re-run the query to determine what new results have appeared due to this additional modeling.</li>
|
||||
</ul>
|
||||
|
||||
<p>Otherwise, the result is likely uninteresting. Custom versions of this query can extend the <code>SafeExternalAPI</code>
|
||||
class and specify <code>getSafeCallable</code> to exclude known safe external APIs from future analysis.</p>
|
||||
|
||||
</recommendation>
|
||||
<example>
|
||||
|
||||
<p>If the query were to return the API <code>flask.make_response [param 0]</code>
|
||||
then we should first consider whether this a security relevant sink. In this case, this is making a HTTP response, so we should
|
||||
consider whether this is an XSS sink. If it is, we should confirm that it is handled by the XSS query.</p>
|
||||
|
||||
<p>If the query were to return the API <code>base64.decodebytes [param 0]</code>, then this should be
|
||||
reviewed as a possible taint step, because tainted data would flow from the 0th argument to the result of the call.</p>
|
||||
|
||||
<p>Note that both examples are correctly handled by the standard taint tracking library and XSS query.</p>
|
||||
</example>
|
||||
<references>
|
||||
|
||||
</references>
|
||||
</qhelp>
|
||||
|
||||
Mention something about `pickle.loads(user_input)`
|
||||
@@ -0,0 +1,17 @@
|
||||
/**
|
||||
* @name Frequency counts for external APIs that are used with untrusted data
|
||||
* @description This reports the external APIs that are used with untrusted data, along with how
|
||||
* frequently the API is called, and how many unique sources of untrusted data flow
|
||||
* to it.
|
||||
* @id python/count-untrusted-data-external-api
|
||||
* @kind table
|
||||
* @tags security external/cwe/cwe-20
|
||||
*/
|
||||
|
||||
import python
|
||||
import ExternalAPIs
|
||||
|
||||
from ExternalAPIUsedWithUntrustedData externalAPI
|
||||
select externalAPI, count(externalAPI.getUntrustedDataNode()) as numberOfUses,
|
||||
externalAPI.getNumberOfUntrustedSources() as numberOfUntrustedSources order by
|
||||
numberOfUntrustedSources desc
|
||||
@@ -0,0 +1,74 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
<overview>
|
||||
<p>Using unsanitized untrusted data in an external API can cause a variety of security
|
||||
issues. This query reports external APIs that use untrusted data. The results are not
|
||||
filtered so that you can audit all examples. The query provides data for security
|
||||
reviews of the application and you can also use it to identify external APIs that should
|
||||
be modeled as either taint steps, or sinks for specific problems.</p>
|
||||
|
||||
<p>An external API is defined as a call to a method that is not defined in the source
|
||||
code, and is not modeled as a taint step in the default taint library. External APIs may
|
||||
be from the Python standard library or dependencies. The query will report the fully qualified name,
|
||||
along with <code>[param x]</code>, where <code>x</code> indicates the position of
|
||||
the parameter receiving the untrusted data. Note that for methods and
|
||||
<code>classmethod</code>s, parameter 0 represents the class instance or class itself
|
||||
respectively.</p>
|
||||
|
||||
<p>Note that an excepted sink might not be included in the results, if it also defines a
|
||||
taint step. This is the case for <code>pickle.loads</code> which is a sink for the
|
||||
Unsafe Deserialization query, but is also a taint step for other queries.</p>
|
||||
|
||||
<p>Note: Compared to the Java version of this query, we currently do not give special
|
||||
care to methods that are overridden in the source code.</p>
|
||||
|
||||
<p>Note: Currently this query will only report results for external packages that are extracted.</p>
|
||||
|
||||
</overview>
|
||||
<recommendation>
|
||||
|
||||
<p>For each result:</p>
|
||||
|
||||
<ul>
|
||||
<li>If the result highlights a known sink, confirm that the result is reported by the relevant query, or
|
||||
that the result is a false positive because this data is sanitized.</li>
|
||||
<li>If the result highlights an unknown sink for a problem, then add modeling for the sink to the relevant query,
|
||||
and confirm that the result is either found, or is safe due to appropriate sanitization.</li>
|
||||
<li>If the result represents a call to an external API that transfers taint, add the appropriate modeling, and
|
||||
re-run the query to determine what new results have appeared due to this additional modeling.</li>
|
||||
</ul>
|
||||
|
||||
<p>Otherwise, the result is likely uninteresting. Custom versions of this query can extend the <code>SafeExternalAPI</code>
|
||||
class and specify <code>getSafeCallable</code> to exclude known safe external APIs from future analysis.</p>
|
||||
|
||||
</recommendation>
|
||||
<example>
|
||||
|
||||
<p>In this first example, a request parameter is read from the Flask <code>request</code> and then ultimately used in a call to the
|
||||
<code>flask.make_response</code> external API:</p>
|
||||
|
||||
<sample src="ExternalAPISinkExample.py" />
|
||||
|
||||
<p>This is an XSS sink. The XSS query should therefore be reviewed to confirm that this sink is appropriately modeled,
|
||||
and if it is, to confirm that the query reports this particular result, or that the result is a false positive due to
|
||||
some existing sanitization.</p>
|
||||
|
||||
<p>In this second example, again a request parameter is read from the Flask <code>request</code>.</p>
|
||||
|
||||
<sample src="ExternalAPITaintStepExample.py" />
|
||||
|
||||
<p>If the query reported the call to <code>base64.decodebytes</code> on line 10, this
|
||||
would suggest that this external API is not currently modeled as a taint step in the
|
||||
taint tracking library. The next step would be to model this as a taint step, then
|
||||
re-run the query to determine what additional results might be found. In this example,
|
||||
the result of the Base64 decoding is pickled, which can result in remote code execution due
|
||||
to unsafe deserialization.
|
||||
|
||||
<p>Note that both examples are correctly handled by the standard taint tracking library and Unsafe Deserialization query.</p>
|
||||
</example>
|
||||
<references>
|
||||
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -0,0 +1,23 @@
|
||||
/**
|
||||
* @name Untrusted data passed to external API
|
||||
* @description Data provided remotely is used in this external API without sanitization, which could be a security risk.
|
||||
* @id python/untrusted-data-to-external-api
|
||||
* @kind path-problem
|
||||
* @precision low
|
||||
* @problem.severity error
|
||||
* @tags security external/cwe/cwe-20
|
||||
*/
|
||||
|
||||
import python
|
||||
import ExternalAPIs
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from
|
||||
UntrustedDataToExternalAPIConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
|
||||
ExternalAPIUsedWithUntrustedData externalAPI
|
||||
where
|
||||
sink.getNode() = externalAPI.getUntrustedDataNode() and
|
||||
config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink,
|
||||
"Call to " + externalAPI.toString() + " with untrusted data from $@.", source.getNode(),
|
||||
source.toString()
|
||||
Reference in New Issue
Block a user