mirror of
https://github.com/github/codeql.git
synced 2025-12-21 19:26:31 +01:00
Python: Fix ExternalAPIs queries
The output might end up being slightly more noisy since we don't collapse positional and keyword arguments when the external target function is included in the database, but this aligns with our long-term goal of not doing that anymore, so I think it's fine.
This commit is contained in:
@@ -1,48 +1,36 @@
|
||||
/**
|
||||
* Definitions for reasoning about untrusted data used in APIs defined outside the
|
||||
* database.
|
||||
* user-written code.
|
||||
*/
|
||||
|
||||
import python
|
||||
private import python
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.Concepts
|
||||
import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.dataflow.new.TaintTracking
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.ApiGraphs
|
||||
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
|
||||
private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate
|
||||
private import semmle.python.types.Builtins
|
||||
private import semmle.python.objects.ObjectInternal
|
||||
|
||||
// IMPLEMENTATION NOTES:
|
||||
//
|
||||
// This query uses *both* the new data-flow library, and points-to. Why? To get this
|
||||
// finished quickly, so it can provide value for our field team and ourselves.
|
||||
//
|
||||
// In the long run, it should not need to use points-to for anything. Possibly this can
|
||||
// even be helpful in figuring out what we need from TypeTrackers and the new data-flow
|
||||
// library to be fully operational.
|
||||
//
|
||||
// At least it will allow us to provide a baseline comparison against a solution that
|
||||
// doesn't use points-to at all
|
||||
//
|
||||
// There is a few dirty things we do here:
|
||||
// 1. DataFlowPrivate: since `DataFlowCall` and `DataFlowCallable` are not exposed
|
||||
// publicly, but we really want access to them.
|
||||
// 2. points-to: we kinda need to do this since this is what powers `DataFlowCall` and
|
||||
// `DataFlowCallable`
|
||||
// 3. ObjectInternal: to provide better names for built-in functions and methods. If we
|
||||
// really wanted to polish our points-to implementation, we could move this
|
||||
// functionality into `BuiltinFunctionValue` and `BuiltinMethodValue`, but will
|
||||
// probably require some more work: for this query, it's totally ok to use
|
||||
// `builtins.open` for the code `open(f)`, but well, it requires a bit of thinking to
|
||||
// figure out if that is desirable in general. I simply skipped a corner here!
|
||||
// 4. TaintTrackingPrivate: Nothing else gives us access to `defaultAdditionalTaintStep` :(
|
||||
/**
|
||||
* A callable that is considered a "safe" external API from a security perspective.
|
||||
* An external API that is considered a "safe" from a security perspective.
|
||||
*/
|
||||
class SafeExternalApi extends Unit {
|
||||
/** Gets a callable that is considered a "safe" external API from a security perspective. */
|
||||
abstract DataFlowPrivate::DataFlowCallable getSafeCallable();
|
||||
/**
|
||||
* Gets a call that is considered "safe" from a security perspective. You can use API
|
||||
* graphs to find calls to functions you know are safe.
|
||||
*
|
||||
* Which works even when the external library isn't extracted.
|
||||
*/
|
||||
abstract DataFlow::CallCfgNode getSafeCall();
|
||||
|
||||
/**
|
||||
* Gets a callable that is considered a "safe" external API from a security
|
||||
* perspective.
|
||||
*
|
||||
* You probably want to define this as `none()` and use `getSafeCall` instead, since
|
||||
* that can handle the external library not being extracted.
|
||||
*/
|
||||
DataFlowPrivate::DataFlowCallable getSafeCallable() { none() }
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for SafeExternalApi */
|
||||
@@ -50,57 +38,112 @@ deprecated class SafeExternalAPI = SafeExternalApi;
|
||||
|
||||
/** The default set of "safe" external APIs. */
|
||||
private class DefaultSafeExternalApi extends SafeExternalApi {
|
||||
override DataFlowPrivate::DataFlowCallable getSafeCallable() {
|
||||
exists(CallableValue cv | cv = result.getCallableValue() |
|
||||
cv = Value::named(["len", "isinstance", "getattr", "hasattr"])
|
||||
or
|
||||
exists(ClassValue cls, string attr |
|
||||
cls = Value::named("dict") and attr in ["__getitem__", "__setitem__"]
|
||||
|
|
||||
cls.lookup(attr) = cv
|
||||
)
|
||||
override DataFlow::CallCfgNode getSafeCall() {
|
||||
result = API::builtin(["len", "isinstance", "getattr", "hasattr"]).getACall()
|
||||
}
|
||||
}
|
||||
|
||||
/** Gets a human readable representation of `node`. */
|
||||
string apiNodeToStringRepr(API::Node node) {
|
||||
node = API::builtin(result)
|
||||
or
|
||||
node = API::moduleImport(result)
|
||||
or
|
||||
exists(API::Node base, string basename |
|
||||
base.getDepth() < node.getDepth() and
|
||||
basename = apiNodeToStringRepr(base)
|
||||
|
|
||||
exists(string m | node = base.getMember(m) | result = basename + "." + m)
|
||||
or
|
||||
node = base.getReturn() and
|
||||
result = basename + "()"
|
||||
or
|
||||
node = base.getAwaited() and
|
||||
result = basename
|
||||
)
|
||||
}
|
||||
|
||||
newtype TInterestingExternalApiCall =
|
||||
TUnresolvedCall(DataFlow::CallCfgNode call) {
|
||||
exists(call.getLocation().getFile().getRelativePath()) and
|
||||
not exists(DataFlowPrivate::DataFlowCall dfCall | dfCall.getNode() = call.getNode()) and
|
||||
not call = any(SafeExternalApi safe).getSafeCall()
|
||||
} or
|
||||
TResolvedCall(DataFlowPrivate::DataFlowCall call) {
|
||||
exists(call.getLocation().getFile().getRelativePath()) and
|
||||
not call.getCallable() = any(SafeExternalApi safe).getSafeCallable() and
|
||||
not exists(call.getCallable().getLocation().getFile().getRelativePath())
|
||||
}
|
||||
|
||||
abstract class InterestingExternalApiCall extends TInterestingExternalApiCall {
|
||||
/** Gets the argument at position `apos`, if any */
|
||||
abstract DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos);
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
abstract string toString();
|
||||
|
||||
/**
|
||||
* Gets a human-readable name for the external API.
|
||||
*/
|
||||
abstract string getApiName();
|
||||
}
|
||||
|
||||
class ResolvedCall extends InterestingExternalApiCall, TResolvedCall {
|
||||
DataFlowPrivate::DataFlowCall dfCall;
|
||||
|
||||
ResolvedCall() { this = TResolvedCall(dfCall) }
|
||||
|
||||
override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
|
||||
result = dfCall.getArgument(apos)
|
||||
}
|
||||
|
||||
override string toString() { result = "ExternalAPI:ResolvedCall" }
|
||||
|
||||
override string getApiName() {
|
||||
exists(DataFlow::CallCfgNode call, API::Node apiNode | dfCall.getNode() = call.getNode() |
|
||||
result = apiNodeToStringRepr(apiNode) and
|
||||
apiNode.getACall() = call
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class UnresolvedCall extends InterestingExternalApiCall, TUnresolvedCall {
|
||||
DataFlow::CallCfgNode call;
|
||||
|
||||
UnresolvedCall() { this = TUnresolvedCall(call) }
|
||||
|
||||
override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
|
||||
exists(int i | apos.isPositional(i) | result = call.getArg(i))
|
||||
or
|
||||
exists(string name | apos.isKeyword(name) | result = call.getArgByName(name))
|
||||
}
|
||||
|
||||
override string toString() { result = "ExternalAPI:UnresolvedCall" }
|
||||
|
||||
override string getApiName() {
|
||||
exists(API::Node apiNode |
|
||||
result = apiNodeToStringRepr(apiNode) and
|
||||
apiNode.getACall() = call
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A node representing data being passed to an external API through a call. */
|
||||
class ExternalApiDataNode extends DataFlow::Node {
|
||||
DataFlowPrivate::DataFlowCallable callable;
|
||||
int i;
|
||||
InterestingExternalApiCall call;
|
||||
DataFlowPrivate::ArgumentPosition apos;
|
||||
|
||||
ExternalApiDataNode() {
|
||||
exists(DataFlowPrivate::DataFlowCall call |
|
||||
exists(call.getLocation().getFile().getRelativePath())
|
||||
|
|
||||
callable = call.getCallable() and
|
||||
// TODO: this ignores some complexity of keyword arguments (especially keyword-only args)
|
||||
this = call.getArg(i)
|
||||
) and
|
||||
not any(SafeExternalApi safe).getSafeCallable() = callable and
|
||||
exists(Value cv | cv = callable.getCallableValue() |
|
||||
cv.isAbsent()
|
||||
or
|
||||
cv.isBuiltin()
|
||||
or
|
||||
cv.(CallableValue).getScope().getLocation().getFile().inStdlib()
|
||||
or
|
||||
not exists(cv.(CallableValue).getScope().getLocation().getFile().getRelativePath())
|
||||
) and
|
||||
this = call.getArgument(apos) and
|
||||
// Not already modeled as a taint step
|
||||
not exists(DataFlow::Node next | TaintTrackingPrivate::defaultAdditionalTaintStep(this, next)) and
|
||||
// for `list.append(x)`, we have a additional taint step from x -> [post] list.
|
||||
// Since we have modeled this explicitly, I don't see any cases where we would want to report this.
|
||||
not exists(DataFlow::Node prev, DataFlow::PostUpdateNode post |
|
||||
not exists(DataFlow::PostUpdateNode post |
|
||||
post.getPreUpdateNode() = this and
|
||||
TaintTrackingPrivate::defaultAdditionalTaintStep(prev, post)
|
||||
TaintTrackingPrivate::defaultAdditionalTaintStep(_, post)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the index for the parameter that will receive this untrusted data */
|
||||
int getIndex() { result = i }
|
||||
|
||||
/** Gets the callable to which this argument is passed. */
|
||||
DataFlowPrivate::DataFlowCallable getCallable() { result = callable }
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for ExternalApiDataNode */
|
||||
@@ -133,19 +176,26 @@ deprecated class UntrustedExternalAPIDataNode = UntrustedExternalApiDataNode;
|
||||
|
||||
/** An external API which is used with untrusted data. */
|
||||
private newtype TExternalApi =
|
||||
/** An untrusted API method `m` where untrusted data is passed at `index`. */
|
||||
TExternalApiParameter(DataFlowPrivate::DataFlowCallable callable, int index) {
|
||||
exists(UntrustedExternalApiDataNode n |
|
||||
callable = n.getCallable() and
|
||||
index = n.getIndex()
|
||||
MkExternalApi(string repr, DataFlowPrivate::ArgumentPosition apos) {
|
||||
exists(UntrustedExternalApiDataNode ex, InterestingExternalApiCall call |
|
||||
ex = call.getArgument(apos) and
|
||||
repr = call.getApiName()
|
||||
)
|
||||
}
|
||||
|
||||
/** An external API which is used with untrusted data. */
|
||||
class ExternalApiUsedWithUntrustedData extends TExternalApi {
|
||||
/** A argument of an external API which is used with untrusted data. */
|
||||
class ExternalApiUsedWithUntrustedData extends MkExternalApi {
|
||||
string repr;
|
||||
DataFlowPrivate::ArgumentPosition apos;
|
||||
|
||||
ExternalApiUsedWithUntrustedData() { this = MkExternalApi(repr, apos) }
|
||||
|
||||
/** Gets a possibly untrusted use of this external API. */
|
||||
UntrustedExternalApiDataNode getUntrustedDataNode() {
|
||||
this = TExternalApiParameter(result.getCallable(), result.getIndex())
|
||||
exists(InterestingExternalApiCall call |
|
||||
result = call.getArgument(apos) and
|
||||
call.getApiName() = repr
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the number of untrusted sources used with this external API. */
|
||||
@@ -154,63 +204,8 @@ class ExternalApiUsedWithUntrustedData extends TExternalApi {
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() {
|
||||
exists(
|
||||
DataFlowPrivate::DataFlowCallable callable, int index, string callableString,
|
||||
string indexString
|
||||
|
|
||||
this = TExternalApiParameter(callable, index) and
|
||||
indexString = "param " + index and
|
||||
exists(CallableValue cv | cv = callable.getCallableValue() |
|
||||
callableString =
|
||||
cv.getScope().getEnclosingModule().getName() + "." + cv.getScope().getQualifiedName()
|
||||
or
|
||||
not exists(cv.getScope()) and
|
||||
(
|
||||
cv instanceof BuiltinFunctionValue and
|
||||
callableString = pretty_builtin_function_value(cv)
|
||||
or
|
||||
cv instanceof BuiltinMethodValue and
|
||||
callableString = pretty_builtin_method_value(cv)
|
||||
or
|
||||
not cv instanceof BuiltinFunctionValue and
|
||||
not cv instanceof BuiltinMethodValue and
|
||||
callableString = cv.toString()
|
||||
)
|
||||
) and
|
||||
result = callableString + " [" + indexString + "]"
|
||||
)
|
||||
}
|
||||
string toString() { result = repr + " [" + apos + "]" }
|
||||
}
|
||||
|
||||
/** DEPRECATED: Alias for ExternalApiUsedWithUntrustedData */
|
||||
deprecated class ExternalAPIUsedWithUntrustedData = ExternalApiUsedWithUntrustedData;
|
||||
|
||||
/** Gets the fully qualified name for the `BuiltinFunctionValue` bfv. */
|
||||
private string pretty_builtin_function_value(BuiltinFunctionValue bfv) {
|
||||
exists(Builtin b | b = bfv.(BuiltinFunctionObjectInternal).getBuiltin() |
|
||||
result = prefix_with_module_if_found(b)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the fully qualified name for the `BuiltinMethodValue` bmv. */
|
||||
private string pretty_builtin_method_value(BuiltinMethodValue bmv) {
|
||||
exists(Builtin b | b = bmv.(BuiltinMethodObjectInternal).getBuiltin() |
|
||||
exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b |
|
||||
result = prefix_with_module_if_found(cls) + "." + b.getName()
|
||||
)
|
||||
or
|
||||
not exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b) and
|
||||
result = b.getName()
|
||||
)
|
||||
}
|
||||
|
||||
/** Helper predicate that tries to adds module qualifier to `b`. Will succeed even if module not found. */
|
||||
private string prefix_with_module_if_found(Builtin b) {
|
||||
exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b |
|
||||
result = mod.getName() + "." + b.getName()
|
||||
)
|
||||
or
|
||||
not exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b) and
|
||||
result = b.getName()
|
||||
}
|
||||
|
||||
@@ -1 +1,4 @@
|
||||
| hmac.new [param 1] | 2 | 1 |
|
||||
| hmac.new [keyword msg] | 1 | 1 |
|
||||
| hmac.new [position 1] | 1 | 1 |
|
||||
| unknown.lib.func [keyword kw] | 2 | 1 |
|
||||
| unknown.lib.func [position 0] | 2 | 1 |
|
||||
|
||||
@@ -1,12 +1,20 @@
|
||||
edges
|
||||
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:13:16:13:22 | ControlFlowNode for request |
|
||||
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:23:16:23:22 | ControlFlowNode for request |
|
||||
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:34:12:34:18 | ControlFlowNode for request |
|
||||
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:42:12:42:18 | ControlFlowNode for request |
|
||||
| test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:5:26:5:32 | GSSA Variable request |
|
||||
| test.py:5:26:5:32 | GSSA Variable request | test.py:0:0:0:0 | ModuleVariableNode for test.request |
|
||||
| test.py:13:16:13:22 | ControlFlowNode for request | test.py:13:16:13:27 | ControlFlowNode for Attribute |
|
||||
| test.py:13:16:13:27 | ControlFlowNode for Attribute | test.py:15:36:15:39 | ControlFlowNode for data |
|
||||
| test.py:23:16:23:22 | ControlFlowNode for request | test.py:23:16:23:27 | ControlFlowNode for Attribute |
|
||||
| test.py:23:16:23:27 | ControlFlowNode for Attribute | test.py:25:44:25:47 | ControlFlowNode for data |
|
||||
| test.py:34:12:34:18 | ControlFlowNode for request | test.py:34:12:34:23 | ControlFlowNode for Attribute |
|
||||
| test.py:34:12:34:23 | ControlFlowNode for Attribute | test.py:35:10:35:13 | ControlFlowNode for data |
|
||||
| test.py:34:12:34:23 | ControlFlowNode for Attribute | test.py:36:13:36:16 | ControlFlowNode for data |
|
||||
| test.py:42:12:42:18 | ControlFlowNode for request | test.py:42:12:42:23 | ControlFlowNode for Attribute |
|
||||
| test.py:42:12:42:23 | ControlFlowNode for Attribute | test.py:43:22:43:25 | ControlFlowNode for data |
|
||||
| test.py:42:12:42:23 | ControlFlowNode for Attribute | test.py:44:25:44:28 | ControlFlowNode for data |
|
||||
nodes
|
||||
| test.py:0:0:0:0 | ModuleVariableNode for test.request | semmle.label | ModuleVariableNode for test.request |
|
||||
| test.py:5:26:5:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
|
||||
@@ -17,7 +25,19 @@ nodes
|
||||
| test.py:23:16:23:22 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
|
||||
| test.py:23:16:23:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
|
||||
| test.py:25:44:25:47 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
|
||||
| test.py:34:12:34:18 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
|
||||
| test.py:34:12:34:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
|
||||
| test.py:35:10:35:13 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
|
||||
| test.py:36:13:36:16 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
|
||||
| test.py:42:12:42:18 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
|
||||
| test.py:42:12:42:23 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
|
||||
| test.py:43:22:43:25 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
|
||||
| test.py:44:25:44:28 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
|
||||
subpaths
|
||||
#select
|
||||
| test.py:15:36:15:39 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:15:36:15:39 | ControlFlowNode for data | Call to hmac.new [param 1] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
|
||||
| test.py:25:44:25:47 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:25:44:25:47 | ControlFlowNode for data | Call to hmac.new [param 1] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
|
||||
| test.py:15:36:15:39 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:15:36:15:39 | ControlFlowNode for data | Call to hmac.new [position 1] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
|
||||
| test.py:25:44:25:47 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:25:44:25:47 | ControlFlowNode for data | Call to hmac.new [keyword msg] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
|
||||
| test.py:35:10:35:13 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:35:10:35:13 | ControlFlowNode for data | Call to unknown.lib.func [position 0] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
|
||||
| test.py:36:13:36:16 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:36:13:36:16 | ControlFlowNode for data | Call to unknown.lib.func [keyword kw] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
|
||||
| test.py:43:22:43:25 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:43:22:43:25 | ControlFlowNode for data | Call to unknown.lib.func [position 0] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
|
||||
| test.py:44:25:44:28 | ControlFlowNode for data | test.py:5:26:5:32 | ControlFlowNode for ImportMember | test.py:44:25:44:28 | ControlFlowNode for data | Call to unknown.lib.func [keyword kw] with untrusted data from $@. | test.py:5:26:5:32 | ControlFlowNode for ImportMember | ControlFlowNode for ImportMember |
|
||||
|
||||
@@ -32,16 +32,16 @@ def hmac_example2():
|
||||
def unknown_lib_1():
|
||||
from unknown.lib import func
|
||||
data = request.args.get("data")
|
||||
func(data) # TODO: currently not recognized
|
||||
func(kw=data) # TODO: currently not recognized
|
||||
func(data)
|
||||
func(kw=data)
|
||||
|
||||
|
||||
@app.route("/unknown-lib-2")
|
||||
def unknown_lib_2():
|
||||
import unknown.lib
|
||||
data = request.args.get("data")
|
||||
unknown.lib.func(data) # TODO: currently not recognized
|
||||
unknown.lib.func(kw=data) # TODO: currently not recognized
|
||||
unknown.lib.func(data)
|
||||
unknown.lib.func(kw=data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user