From 86b5f0adc7e6660f1a0cc562eb728803204e07a6 Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 7 Jul 2023 09:42:34 +0200 Subject: [PATCH 1/2] Revert "Merge pull request #13620 from github/revert-13496-rb/tracking-on-demand" This reverts commit 133de56ac24635863b582955a6e5f697b63c57e1, reversing changes made to 28a8e48351b5dd979fdcc7d01bbabb3362f24dc9. --- .../dataflow/new/internal/TypeTracker.qll | 45 +- .../2023-06-28-tracking-on-demand.md | 7 + ruby/ql/lib/codeql/ruby/ApiGraphs.qll | 1850 +++++++++++------ .../ruby/dataflow/internal/DataFlowPublic.qll | 134 +- .../UnicodeBypassValidationQuery.qll | 8 +- .../experimental/ZipSlipCustomizations.qll | 2 +- .../ruby/frameworks/ActionController.qll | 47 +- .../codeql/ruby/frameworks/ActionMailbox.qll | 6 +- .../codeql/ruby/frameworks/ActionMailer.qll | 42 +- .../codeql/ruby/frameworks/ActiveRecord.qll | 265 +-- .../codeql/ruby/frameworks/ActiveResource.qll | 196 +- .../ql/lib/codeql/ruby/frameworks/GraphQL.qll | 25 +- .../ql/lib/codeql/ruby/frameworks/Sqlite3.qll | 25 +- ruby/ql/lib/codeql/ruby/frameworks/Twirp.qll | 54 +- .../lib/codeql/ruby/frameworks/core/Gem.qll | 4 +- .../codeql/ruby/frameworks/core/Kernel.qll | 3 +- .../ruby/frameworks/data/ModelsAsData.qll | 2 +- .../data/internal/ApiGraphModelsSpecific.qll | 54 +- .../codeql/ruby/security/KernelOpenQuery.qll | 3 +- ruby/ql/lib/codeql/ruby/security/OpenSSL.qll | 4 +- .../ruby/typetracking/ApiGraphShared.qll | 328 +++ .../codeql/ruby/typetracking/TypeTracker.qll | 45 +- .../dataflow/api-graphs/ApiGraphs.expected | 8 +- ...ed => VerifyApiGraphExpectations.expected} | 0 .../api-graphs/VerifyApiGraphExpectations.ql | 77 + .../dataflow/api-graphs/callbacks.rb | 52 +- .../dataflow/api-graphs/chained-access.rb | 31 + .../dataflow/api-graphs/method-callbacks.rb | 64 + .../dataflow/api-graphs/self-dot-class.rb | 10 + .../dataflow/api-graphs/test1.rb | 72 +- .../library-tests/dataflow/api-graphs/use.ql | 88 - .../frameworks/Twirp/Twirp.expected | 2 + .../library-tests/frameworks/Twirp/Twirp.ql | 2 +- .../frameworks/Twirp/hello_world_server.rb | 15 +- .../action_dispatch/ActionDispatch.expected | 12 +- .../active_record/ActiveRecord.expected | 95 +- .../frameworks/active_record/ActiveRecord.ql | 14 +- .../active_resource/ActiveResource.expected | 10 + .../active_resource/ActiveResource.ql | 15 +- .../security/cwe-079/StoredXSS.expected | 10 - .../app/views/foo/stores/show.html.erb | 6 +- .../security/cwe-089/SqlInjection.expected | 9 +- .../util/test/InlineExpectationsTest.qll | 13 + 43 files changed, 2449 insertions(+), 1305 deletions(-) create mode 100644 ruby/ql/lib/change-notes/2023-06-28-tracking-on-demand.md create mode 100644 ruby/ql/lib/codeql/ruby/typetracking/ApiGraphShared.qll rename ruby/ql/test/library-tests/dataflow/api-graphs/{use.expected => VerifyApiGraphExpectations.expected} (100%) create mode 100644 ruby/ql/test/library-tests/dataflow/api-graphs/VerifyApiGraphExpectations.ql create mode 100644 ruby/ql/test/library-tests/dataflow/api-graphs/chained-access.rb create mode 100644 ruby/ql/test/library-tests/dataflow/api-graphs/method-callbacks.rb create mode 100644 ruby/ql/test/library-tests/dataflow/api-graphs/self-dot-class.rb delete mode 100644 ruby/ql/test/library-tests/dataflow/api-graphs/use.ql diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll index fb3d7bf828f..001375b4dc5 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll @@ -55,10 +55,9 @@ private module Cached { ) } - pragma[nomagic] - private TypeTracker noContentTypeTracker(boolean hasCall) { - result = MkTypeTracker(hasCall, noContent()) - } + /** Gets a type tracker with no content and the call bit set to the given value. */ + cached + TypeTracker noContentTypeTracker(boolean hasCall) { result = MkTypeTracker(hasCall, noContent()) } /** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */ cached @@ -318,6 +317,8 @@ class StepSummary extends TStepSummary { /** Provides predicates for updating step summaries (`StepSummary`s). */ module StepSummary { + predicate append = Cached::append/2; + /** * Gets the summary that corresponds to having taken a forwards * inter-procedural step from `nodeFrom` to `nodeTo`. @@ -378,6 +379,35 @@ module StepSummary { } deprecated predicate localSourceStoreStep = flowsToStoreStep/3; + + /** Gets the step summary for a level step. */ + StepSummary levelStep() { result = LevelStep() } + + /** Gets the step summary for a call step. */ + StepSummary callStep() { result = CallStep() } + + /** Gets the step summary for a return step. */ + StepSummary returnStep() { result = ReturnStep() } + + /** Gets the step summary for storing into `content`. */ + StepSummary storeStep(TypeTrackerContent content) { result = StoreStep(content) } + + /** Gets the step summary for loading from `content`. */ + StepSummary loadStep(TypeTrackerContent content) { result = LoadStep(content) } + + /** Gets the step summary for loading from `load` and then storing into `store`. */ + StepSummary loadStoreStep(TypeTrackerContent load, TypeTrackerContent store) { + result = LoadStoreStep(load, store) + } + + /** Gets the step summary for a step that only permits contents matched by `filter`. */ + StepSummary withContent(ContentFilter filter) { result = WithContent(filter) } + + /** Gets the step summary for a step that blocks contents matched by `filter`. */ + StepSummary withoutContent(ContentFilter filter) { result = WithoutContent(filter) } + + /** Gets the step summary for a jump step. */ + StepSummary jumpStep() { result = JumpStep() } } /** @@ -540,6 +570,13 @@ module TypeTracker { * Gets a valid end point of type tracking. */ TypeTracker end() { result.end() } + + /** + * INTERNAL USE ONLY. + * + * Gets a valid end point of type tracking with the call bit set to the given value. + */ + predicate end = Cached::noContentTypeTracker/1; } pragma[nomagic] diff --git a/ruby/ql/lib/change-notes/2023-06-28-tracking-on-demand.md b/ruby/ql/lib/change-notes/2023-06-28-tracking-on-demand.md new file mode 100644 index 00000000000..5aa79d5e2f3 --- /dev/null +++ b/ruby/ql/lib/change-notes/2023-06-28-tracking-on-demand.md @@ -0,0 +1,7 @@ +--- +category: majorAnalysis +--- +* The API graph library (`codeql.ruby.ApiGraphs`) has been significantly improved, with better support for inheritance, + and data-flow nodes can now be converted to API nodes by calling `.track()` or `.backtrack()` on the node. + API graphs allow for efficient modelling of how a given value is used by the code base, or how values produced by the code base + are consumed by a library. See the documentation for `API::Node` for details and examples. diff --git a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll index 1cf4c445781..0d6e669b1de 100644 --- a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll +++ b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll @@ -1,13 +1,13 @@ /** - * Provides an implementation of _API graphs_, which are an abstract representation of the API - * surface used and/or defined by a code base. + * Provides an implementation of _API graphs_, which allow efficient modelling of how a given + * value is used by the code base or how values produced by the code base are consumed by a library. * - * The nodes of the API graph represent definitions and uses of API components. The edges are - * directed and labeled; they specify how the components represented by nodes relate to each other. + * See `API::Node` for more details. */ private import codeql.ruby.AST private import codeql.ruby.DataFlow +private import codeql.ruby.typetracking.ApiGraphShared private import codeql.ruby.typetracking.TypeTracker private import codeql.ruby.typetracking.TypeTrackerSpecific as TypeTrackerSpecific private import codeql.ruby.controlflow.CfgNodes @@ -19,85 +19,140 @@ private import codeql.ruby.dataflow.internal.DataFlowDispatch as DataFlowDispatc */ module API { /** - * A node in the API graph, representing a value that has crossed the boundary between this - * codebase and an external library (or in general, any external codebase). + * A node in the API graph, that is, a value that can be tracked interprocedurally. * - * ### Basic usage + * The API graph is a graph for tracking values of certain types in a way that accounts for inheritance + * and interprocedural data flow. * * API graphs are typically used to identify "API calls", that is, calls to an external function * whose implementation is not necessarily part of the current codebase. * + * ### Basic usage + * * The most basic use of API graphs is typically as follows: * 1. Start with `API::getTopLevelMember` for the relevant library. * 2. Follow up with a chain of accessors such as `getMethod` describing how to get to the relevant API function. - * 3. Map the resulting API graph nodes to data-flow nodes, using `asSource` or `asSink`. + * 3. Map the resulting API graph nodes to data-flow nodes, using `asSource`, `asSink`, or `asCall`. * - * For example, a simplified way to get arguments to `Foo.bar` would be - * ```ql - * API::getTopLevelMember("Foo").getMethod("bar").getParameter(0).asSink() + * The following examples demonstrate how to identify the expression `x` in various basic cases: + * ```rb + * # API::getTopLevelMember("Foo").getMethod("bar").getArgument(0).asSink() + * Foo.bar(x) + * + * # API::getTopLevelMember("Foo").getMethod("bar").getKeywordArgument("foo").asSink() + * Foo.bar(foo: x) + * + * # API::getTopLevelMember("Foo").getInstance().getMethod("bar").getArgument(0).asSink() + * Foo.new.bar(x) + * + * Foo.bar do |x| # API::getTopLevelMember("Foo").getMethod("bar").getBlock().getParameter(0).asSource() + * end * ``` * - * The most commonly used accessors are `getMember`, `getMethod`, `getParameter`, and `getReturn`. + * ### Data flow * - * ### API graph nodes + * The members predicates on this class generally take inheritance and data flow into account. * - * There are two kinds of nodes in the API graphs, distinguished by who is "holding" the value: - * - **Use-nodes** represent values held by the current codebase, which came from an external library. - * (The current codebase is "using" a value that came from the library). - * - **Def-nodes** represent values held by the external library, which came from this codebase. - * (The current codebase "defines" the value seen by the library). - * - * API graph nodes are associated with data-flow nodes in the current codebase. - * (Since external libraries are not part of the database, there is no way to associate with concrete - * data-flow nodes from the external library). - * - **Use-nodes** are associated with data-flow nodes where a value enters the current codebase, - * such as the return value of a call to an external function. - * - **Def-nodes** are associated with data-flow nodes where a value leaves the current codebase, - * such as an argument passed in a call to an external function. - * - * - * ### Access paths and edge labels - * - * Nodes in the API graph are associated with a set of access paths, describing a series of operations - * that may be performed to obtain that value. - * - * For example, the access path `API::getTopLevelMember("Foo").getMethod("bar")` represents the action of - * reading the top-level constant `Foo` and then accessing the method `bar` on the resulting object. - * It would be associated with a call such as `Foo.bar()`. - * - * Each edge in the graph is labelled by such an "operation". For an edge `A->B`, the type of the `A` node - * determines who is performing the operation, and the type of the `B` node determines who ends up holding - * the result: - * - An edge starting from a use-node describes what the current codebase is doing to a value that - * came from a library. - * - An edge starting from a def-node describes what the external library might do to a value that - * came from the current codebase. - * - An edge ending in a use-node means the result ends up in the current codebase (at its associated data-flow node). - * - An edge ending in a def-node means the result ends up in external code (its associated data-flow node is - * the place where it was "last seen" in the current codebase before flowing out) - * - * Because the implementation of the external library is not visible, it is not known exactly what operations - * it will perform on values that flow there. Instead, the edges starting from a def-node are operations that would - * lead to an observable effect within the current codebase; without knowing for certain if the library will actually perform - * those operations. (When constructing these edges, we assume the library is somewhat well-behaved). - * - * For example, given this snippet: + * The following example demonstrates a case where data flow was used to find the sink `x`: * ```ruby - * Foo.bar(->(x) { doSomething(x) }) + * def doSomething f + * f.bar(x) # API::getTopLevelMember("Foo").getInstance().getMethod("bar").getArgument(0).asSink() + * end + * doSomething Foo.new * ``` - * A callback is passed to the external function `Foo.bar`. We can't know if `Foo.bar` will actually invoke this callback. - * But _if_ the library should decide to invoke the callback, then a value will flow into the current codebase via the `x` parameter. - * For that reason, an edge is generated representing the argument-passing operation that might be performed by `Foo.bar`. - * This edge is going from the def-node associated with the callback to the use-node associated with the parameter `x` of the lambda. + * The call `API::getTopLevelMember("Foo").getInstance()` identifies the `Foo.new` call, and `getMethod("bar")` + * then follows data flow from there to find calls to `bar` where that object flows to the receiver. + * This results in the `f.bar` call. + * + * ### Backward data flow + * + * When inspecting the arguments of a call, the data flow direction is backwards. + * The following example illustrates this when we match the `x` parameter of a block: + * ```ruby + * def doSomething &blk + * Foo.bar &blk + * end + * doSomething do |x| # API::getTopLevelMember("Foo").getMethod("bar").getBlock().getParameter(0).asSource() + * end + * ``` + * When `getParameter(0)` is evaluated, the API graph backtracks the `&blk` argument to the block argument a few + * lines below. As a result, it eventually matches the `x` parameter of that block. + * + * ### Inheritance + * + * When a class or module object is tracked, inheritance is taken into account. + * + * In the following example, a call to `Foo.bar` was found via a subclass of `Foo`, + * because classes inherit singleton methods from their base class: + * ```ruby + * class Subclass < Foo + * def self.doSomething + * bar(x) # API::getTopLevelMember("Foo").getMethod("bar").getArgument(0).asSink() + * end + * end + * ``` + * + * Similarly, instance methods can be found in subclasses, or ancestors of subclases in cases of multiple inheritance: + * ```rb + * module Mixin + * def doSomething + * bar(x) # API::getTopLevelMember("Foo").getInstance().getMethod("bar").getArgument(0).asSink() + * end + * end + * class Subclass < Foo + * include Mixin + * end + * ``` + * The value of `self` in `Mixin#doSomething` is seen as a potential instance of `Foo`, and is thus found by `getTopLevelMember("Foo").getInstance()`. + * This eventually results in finding the call `bar`, due to its implicit `self` receiver, and finally its argument `x` is found as the sink. + * + * ### Backward data flow and classes + * + * When inspecting the arguments of a call, and the value flowing into that argument is a user-defined class (or an instance thereof), + * uses of `getMethod` will find method definitions in that class (including inherited ones) rather than finding method calls. + * + * This example illustrates how this can be used to model cases where the library calls a specific named method on a user-defined class: + * ```rb + * class MyClass + * def doSomething + * x # API::getTopLevelMember("Foo").getMethod("bar").getArgument(0).getMethod("doSomething").getReturn().asSink() + * end + * end + * Foo.bar MyClass.new + * ``` + * + * When modeling an external library that is known to call a specific method on a parameter (in this case `doSomething`), this makes + * it possible to find the corresponding method definition in user code. + * + * ### Strict left-to-right evaluation + * + * Most member predicates on this class are intended to be chained, and are always evaluated from left to right, which means + * the caller should restrict the initial set of values. + * + * For example, in the following snippet, we always find the uses of `Foo` before finding calls to `bar`: + * ```ql + * API::getTopLevelMember("Foo").getMethod("bar") + * ``` + * In particular, the implementation will never look for calls to `bar` and work backward from there. + * + * Beware of the footgun that is to use API graphs with an unrestricted receiver: + * ```ql + * API::Node barCall(API::Node base) { + * result = base.getMethod("bar") // Do not do this! + * } + * ``` + * The above predicate does not restrict the receiver, and will thus perform an interprocedural data flow + * search starting at every node in the graph, which is very expensive. */ class Node extends Impl::TApiNode { /** - * Gets a data-flow node where this value may flow after entering the current codebase. + * Gets a data-flow node where this value may flow interprocedurally. * * This is similar to `asSource()` but additionally includes nodes that are transitively reachable by data flow. * See `asSource()` for examples. */ - pragma[inline] + bindingset[this] + pragma[inline_late] DataFlow::Node getAValueReachableFromSource() { result = getAValueReachableFromSourceInline(this) } @@ -119,16 +174,14 @@ module API { * end * ``` */ - pragma[inline] - DataFlow::LocalSourceNode asSource() { - result = pragma[only_bind_out](this).(Node::Internal).asSourceInternal() - } + bindingset[this] + pragma[inline_late] + DataFlow::LocalSourceNode asSource() { result = asSourceInline(this) } /** - * Gets a data-flow node where this value leaves the current codebase and flows into an - * external library (or in general, any external codebase). + * Gets a data-flow node where this value potentially flows into an external library. * - * Concretely, this corresponds to an argument passed to a call to external code. + * This is usually the argument of a call, but can also be the return value of a callback. * * For example: * ```ruby @@ -143,15 +196,443 @@ module API { * }) * ``` */ - DataFlow::Node asSink() { Impl::def(this, result) } + bindingset[this] + pragma[inline_late] + DataFlow::Node asSink() { result = asSinkInline(this) } /** - * Get a data-flow node that transitively flows to an external library (or in general, any external codebase). + * Gets a callable that can reach this sink. + * + * For example: + * ```ruby + * Foo.bar do |x| # API::getTopLevelMember("Foo").getMethod("bar").getBlock().asCallable() + * end + * + * class Baz + * def m # API::getTopLevelMember("Foo").getMethod("bar").getArgument(0).getMethod("m").asCallable() + * end + * end + * Foo.bar(Baz.new) + * ``` + */ + bindingset[this] + pragma[inline_late] + DataFlow::CallableNode asCallable() { Impl::asCallable(this.getAnEpsilonSuccessor(), result) } + + /** + * Get a data-flow node that transitively flows to this value, provided that this value corresponds + * to a sink. * * This is similar to `asSink()` but additionally includes nodes that transitively reach a sink by data flow. * See `asSink()` for examples. */ - DataFlow::Node getAValueReachingSink() { result = Impl::trackDefNode(this.asSink()) } + bindingset[this] + pragma[inline_late] + DataFlow::Node getAValueReachingSink() { result = getAValueReachingSinkInline(this) } + + /** + * Gets a module or class referred to by this API node. + * + * For example: + * ```ruby + * module Foo + * class Bar # API::getTopLevelMember("Foo").getMember("Bar").asModule() + * end + * end + * ``` + */ + bindingset[this] + pragma[inline_late] + DataFlow::ModuleNode asModule() { this = Impl::MkModuleObjectDown(result) } + + /** + * Gets the call referred to by this API node. + * + * For example: + * ```ruby + * # API::getTopLevelMember("Foo").getMethod("bar").asCall() + * Foo.bar + * + * class Bar < Foo + * def doSomething + * # API::getTopLevelMember("Foo").getInstance().getMethod("baz").asCall() + * baz + * end + * end + * ``` + */ + bindingset[this] + pragma[inline_late] + DataFlow::CallNode asCall() { this = Impl::MkMethodAccessNode(result) } + + /** + * DEPRECATED. Use `asCall()` instead. + */ + pragma[inline] + deprecated DataFlow::CallNode getCallNode() { this = Impl::MkMethodAccessNode(result) } + + /** + * Gets a module or class that descends from the module or class referenced by this API node. + */ + bindingset[this] + pragma[inline_late] + DataFlow::ModuleNode getADescendentModule() { result = this.getAnEpsilonSuccessor().asModule() } + + /** + * Gets a call to a method on the receiver represented by this API node. + * + * This is a shorthand for `getMethod(method).asCall()`, and thus returns a data-flow node + * rather than an API node. + * + * For example: + * ```ruby + * # API::getTopLevelMember("Foo").getAMethodCall("bar") + * Foo.bar + * ``` + */ + pragma[inline] + DataFlow::CallNode getAMethodCall(string method) { + // This predicate is currently not 'inline_late' because 'method' can be an input or output + result = this.getMethod(method).asCall() + } + + /** + * Gets an access to the constant `m` with this value as the base of the access. + * + * For example: + * ```ruby + * A::B # API::getATopLevelMember("A").getMember("B") + * + * module A + * class B # API::getATopLevelMember("A").getMember("B") + * end + * end + * ``` + */ + pragma[inline] + Node getMember(string m) { + // This predicate is currently not 'inline_late' because 'm' can be an input or output + Impl::memberEdge(this.getAnEpsilonSuccessor(), m, result) + } + + /** + * Gets an access to a constant with this value as the base of the access. + * + * This is equivalent to `getMember(_)` but can be more efficient. + */ + bindingset[this] + pragma[inline_late] + Node getAMember() { Impl::anyMemberEdge(this.getAnEpsilonSuccessor(), result) } + + /** + * Gets a node that may refer to an instance of the module or class represented by this API node. + * + * This includes the following: + * - Calls to `new` on this module or class or a descendent thereof + * - References to `self` in instance methods declared in any ancestor of any descendent of this module or class + * + * For example: + * ```ruby + * A.new # API::getTopLevelMember("A").getInstance() + * + * class B < A + * def m + * self # API::getTopLevelMember("A").getInstance() + * end + * end + * + * B.new # API::getTopLevelMember("A").getInstance() + * + * class C < A + * include Mixin + * end + * module Mixin + * def m + * # Although 'Mixin' is not directly related to 'A', 'self' may refer to an instance of 'A' + * # due to its inclusion in a subclass of 'A'. + * self # API::getTopLevelMember("A").getInstance() + * end + * end + * ``` + */ + bindingset[this] + pragma[inline_late] + Node getInstance() { Impl::instanceEdge(this.getAnEpsilonSuccessor(), result) } + + /** + * Gets a call to `method` with this value as the receiver, or the definition of `method` on + * an object that can reach this sink. + * + * If the receiver represents a module or class object, this includes calls on descendents of that module or class. + * + * For example: + * ```ruby + * # API::getTopLevelMember("Foo").getMethod("bar") + * Foo.bar + * + * # API::getTopLevelMember("Foo").getInstance().getMethod("bar") + * Foo.new.bar + * + * class B < Foo + * end + * B.bar # API::getTopLevelMember("Foo").getMethod("bar") + * + * class C + * def m # API::getTopLevelMember("Foo").getMethod("bar").getArgument(0).getMethod("m") + * end + * end + * Foo.bar(C.new) + * ``` + */ + pragma[inline] + Node getMethod(string method) { + // TODO: Consider 'getMethodTarget(method)' for looking up method definitions? + // This predicate is currently not 'inline_late' because 'method' can be an input or output + Impl::methodEdge(this.getAnEpsilonSuccessor(), method, result) + } + + /** + * Gets the result of this call, or the return value of this callable. + */ + bindingset[this] + pragma[inline_late] + Node getReturn() { Impl::returnEdge(this.getAnEpsilonSuccessor(), result) } + + /** + * Gets the result of a call to `method` with this value as the receiver, or the return value of `method` defined on + * an object that can reach this sink. + * + * This is a shorthand for `getMethod(method).getReturn()`. + */ + pragma[inline] + Node getReturn(string method) { + // This predicate is currently not 'inline_late' because 'method' can be an input or output + result = this.getMethod(method).getReturn() + } + + /** + * Gets the `n`th positional argument to this call. + * + * For example, this would get `x` in the following snippet: + * ```ruby + * Foo.bar(x) # API::getTopLevelMember("Foo").getMethod("bar").getArgument(0) + * ``` + */ + pragma[inline] + Node getArgument(int n) { + // This predicate is currently not 'inline_late' because 'n' can be an input or output + Impl::positionalArgumentEdge(this, n, result) + } + + /** + * Gets the given keyword argument to this call. + * + * For example, this would get `x` in the following snippet: + * ```ruby + * Foo.bar(baz: x) # API::getTopLevelMember("Foo").getMethod("bar").getKeywordArgument("baz") + * ``` + */ + pragma[inline] + Node getKeywordArgument(string name) { + // This predicate is currently not 'inline_late' because 'name' can be an input or output + Impl::keywordArgumentEdge(this, name, result) + } + + /** + * Gets the block parameter of a callable that can reach this sink. + * + * For example, this would get the `&blk` in the following snippet: + * ```ruby + * # API::getTopLevelMember("Foo").getMethod("bar").getArgument(0).getBlockParameter() + * Foo.bar(->(&blk) {}) + * end + * ``` + */ + bindingset[this] + pragma[inline_late] + Node getBlockParameter() { Impl::blockParameterEdge(this.getAnEpsilonSuccessor(), result) } + + /** + * Gets the `n`th positional parameter of this callable, or the `n`th positional argument to this call. + * + * Note: for historical reasons, this predicate may refer to an argument of a call, but this may change in the future. + * When referring to an argument, it is recommended to use `getArgument(n)` instead. + */ + pragma[inline] + Node getParameter(int n) { + // This predicate is currently not 'inline_late' because 'n' can be an input or output + Impl::positionalParameterOrArgumentEdge(this.getAnEpsilonSuccessor(), n, result) + } + + /** + * Gets the given keyword parameter of this callable, or keyword argument to this call. + * + * Note: for historical reasons, this predicate may refer to an argument of a call, but this may change in the future. + * When referring to an argument, it is recommended to use `getKeywordArgument(n)` instead. + */ + pragma[inline] + Node getKeywordParameter(string name) { + // This predicate is currently not 'inline_late' because 'name' can be an input or output + Impl::keywordParameterOrArgumentEdge(this.getAnEpsilonSuccessor(), name, result) + } + + /** + * Gets the block argument to this call, or the block parameter of this callable. + * + * Note: this predicate may refer to either an argument or a parameter. When referring to a block parameter, + * it is recommended to use `getBlockParameter()` instead. + * + * For example: + * ```ruby + * Foo.bar do |x| # API::getTopLevelMember("Foo").getMethod("bar").getBlock().getParameter(0) + * end + * ``` + */ + bindingset[this] + pragma[inline_late] + Node getBlock() { Impl::blockParameterOrArgumentEdge(this.getAnEpsilonSuccessor(), result) } + + /** + * Gets the argument passed in argument position `pos` at this call. + */ + pragma[inline] + Node getArgumentAtPosition(DataFlowDispatch::ArgumentPosition pos) { + // This predicate is currently not 'inline_late' because 'pos' can be an input or output + Impl::argumentEdge(pragma[only_bind_out](this), pos, result) // note: no need for epsilon step since 'this' must be a call + } + + /** + * Gets the parameter at position `pos` of this callable. + */ + pragma[inline] + Node getParameterAtPosition(DataFlowDispatch::ParameterPosition pos) { + // This predicate is currently not 'inline_late' because 'pos' can be an input or output + Impl::parameterEdge(this.getAnEpsilonSuccessor(), pos, result) + } + + /** + * Gets a `new` call with this value as the receiver. + */ + bindingset[this] + pragma[inline_late] + DataFlow::ExprNode getAnInstantiation() { result = this.getReturn("new").asSource() } + + /** + * Gets a representative for the `content` of this value. + * + * When possible, it is preferrable to use one of the specialized variants of this predicate, such as `getAnElement`. + * + * Concretely, this gets sources where `content` is read from this value, and as well as sinks where + * `content` is stored onto this value or onto an object that can reach this sink. + */ + pragma[inline] + Node getContent(DataFlow::Content content) { + // This predicate is currently not 'inline_late' because 'content' can be an input or output + Impl::contentEdge(this.getAnEpsilonSuccessor(), content, result) + } + + /** + * Gets a representative for the `contents` of this value. + * + * See `getContent()` for more details. + */ + bindingset[this, contents] + pragma[inline_late] + Node getContents(DataFlow::ContentSet contents) { + // We always use getAStoreContent when generating content edges, and we always use getAReadContent when querying the graph. + result = this.getContent(contents.getAReadContent()) + } + + /** + * Gets a representative for the instance field of the given `name`, which must include the `@` character. + * + * This can be used to find cases where a class accesses the fields used by a base class. + * + * ```ruby + * class A < B + * def m + * @foo # API::getTopLevelMember("B").getInstance().getField("@foo") + * end + * end + * ``` + */ + pragma[inline] + Node getField(string name) { + // This predicate is currently not 'inline_late' because 'name' can be an input or output + Impl::fieldEdge(this.getAnEpsilonSuccessor(), name, result) + } + + /** + * Gets a representative for an arbitrary element of this collection. + * + * For example: + * ```ruby + * Foo.bar.each do |x| # API::getTopLevelMember("Foo").getMethod("bar").getReturn().getAnElement() + * end + * + * Foo.bar[0] # API::getTopLevelMember("Foo").getMethod("bar").getReturn().getAnElement() + * ``` + */ + bindingset[this] + pragma[inline_late] + Node getAnElement() { Impl::elementEdge(this.getAnEpsilonSuccessor(), result) } + + /** + * Gets the data-flow node that gives rise to this node, if any. + */ + DataFlow::Node getInducingNode() { + this = Impl::MkMethodAccessNode(result) or + this = Impl::MkBackwardNode(result, _) or + this = Impl::MkForwardNode(result, _) or + this = Impl::MkSinkNode(result) + } + + /** Gets the location of this node. */ + Location getLocation() { + result = this.getInducingNode().getLocation() + or + exists(DataFlow::ModuleNode mod | + this = Impl::MkModuleObjectDown(mod) + or + this = Impl::MkModuleInstanceUp(mod) + | + result = mod.getLocation() + ) + or + this instanceof RootNode and + result instanceof EmptyLocation + } + + /** + * Gets a textual representation of this element. + */ + string toString() { none() } + + /** + * Gets a node representing a (direct or indirect) subclass of the class represented by this node. + * ```rb + * class A; end + * class B < A; end + * class C < B; end + * ``` + * In the example above, `getMember("A").getASubclass()` will return uses of `A`, `B` and `C`. + */ + pragma[inline] + deprecated Node getASubclass() { result = this } + + /** + * Gets a node representing a direct subclass of the class represented by this node. + * ```rb + * class A; end + * class B < A; end + * class C < B; end + * ``` + * In the example above, `getMember("A").getAnImmediateSubclass()` will return uses of `B` only. + */ + pragma[inline] + deprecated Node getAnImmediateSubclass() { + result = this.asModule().getAnImmediateDescendent().trackModule() + } /** DEPRECATED. This predicate has been renamed to `getAValueReachableFromSource()`. */ deprecated DataFlow::Node getAUse() { result = this.getAValueReachableFromSource() } @@ -166,326 +647,143 @@ module API { deprecated DataFlow::Node getAValueReachingRhs() { result = this.getAValueReachingSink() } /** - * Gets a call to a method on the receiver represented by this API component. - */ - pragma[inline] - DataFlow::CallNode getAMethodCall(string method) { result = this.getReturn(method).asSource() } - - /** - * Gets a node representing member `m` of this API component. + * DEPRECATED. API graph nodes are no longer associated with specific paths. * - * For example, a member can be: - * - * - A submodule of a module - * - An attribute of an object - */ - pragma[inline] - Node getMember(string m) { - result = pragma[only_bind_out](this).(Node::Internal).getMemberInternal(m) - } - - /** - * Gets a node representing a member of this API component where the name of the member may - * or may not be known statically. - */ - cached - Node getAMember() { - Impl::forceCachingInSameStage() and - result = this.getASuccessor(Label::member(_)) - } - - /** - * Gets a node representing an instance of this API component, that is, an object whose - * constructor is the function represented by this node. - * - * For example, if this node represents a use of some class `A`, then there might be a node - * representing instances of `A`, typically corresponding to expressions `A.new` at the - * source level. - * - * This predicate may have multiple results when there are multiple constructor calls invoking this API component. - * Consider using `getAnInstantiation()` if there is a need to distinguish between individual constructor calls. - */ - pragma[inline] - Node getInstance() { result = this.getASubclass().getReturn("new") } - - /** - * Gets a node representing a call to `method` on the receiver represented by this node. - */ - pragma[inline] - MethodAccessNode getMethod(string method) { - result = pragma[only_bind_out](this).(Node::Internal).getMethodInternal(method) - } - - /** - * Gets a node representing the result of this call. - */ - pragma[inline] - Node getReturn() { result = pragma[only_bind_out](this).(Node::Internal).getReturnInternal() } - - /** - * Gets a node representing the result of calling a method on the receiver represented by this node. - */ - pragma[inline] - Node getReturn(string method) { result = this.getMethod(method).getReturn() } - - /** Gets an API node representing the `n`th positional parameter. */ - cached - Node getParameter(int n) { - Impl::forceCachingInSameStage() and - result = this.getASuccessor(Label::parameter(n)) - } - - /** Gets an API node representing the given keyword parameter. */ - cached - Node getKeywordParameter(string name) { - Impl::forceCachingInSameStage() and - result = this.getASuccessor(Label::keywordParameter(name)) - } - - /** Gets an API node representing the block parameter. */ - cached - Node getBlock() { - Impl::forceCachingInSameStage() and - result = this.getASuccessor(Label::blockParameter()) - } - - /** - * Gets a `new` call to the function represented by this API component. - */ - pragma[inline] - DataFlow::ExprNode getAnInstantiation() { result = this.getInstance().asSource() } - - /** - * Gets a node representing a (direct or indirect) subclass of the class represented by this node. - * ```rb - * class A; end - * class B < A; end - * class C < B; end - * ``` - * In the example above, `getMember("A").getASubclass()` will return uses of `A`, `B` and `C`. - */ - Node getASubclass() { result = this.getAnImmediateSubclass*() } - - /** - * Gets a node representing a direct subclass of the class represented by this node. - * ```rb - * class A; end - * class B < A; end - * class C < B; end - * ``` - * In the example above, `getMember("A").getAnImmediateSubclass()` will return uses of `B` only. - */ - cached - Node getAnImmediateSubclass() { - Impl::forceCachingInSameStage() and result = this.getASuccessor(Label::subclass()) - } - - /** - * Gets a node representing the `content` stored on the base object. - */ - cached - Node getContent(DataFlow::Content content) { - Impl::forceCachingInSameStage() and - result = this.getASuccessor(Label::content(content)) - } - - /** - * Gets a node representing the `contents` stored on the base object. - */ - pragma[inline] - Node getContents(DataFlow::ContentSet contents) { - // We always use getAStoreContent when generating the graph, and we always use getAReadContent when querying the graph. - result = this.getContent(contents.getAReadContent()) - } - - /** Gets a node representing the instance field of the given `name`, which must include the `@` character. */ - cached - Node getField(string name) { - Impl::forceCachingInSameStage() and - result = this.getContent(DataFlowPrivate::TFieldContent(name)) - } - - /** Gets a node representing an element of this collection (known or unknown). */ - cached - Node getAnElement() { - Impl::forceCachingInSameStage() and - result = this.getContents(any(DataFlow::ContentSet set | set.isAnyElement())) - } - - /** * Gets a string representation of the lexicographically least among all shortest access paths * from the root to this node. */ - string getPath() { - result = min(string p | p = this.getAPath(Impl::distanceFromRoot(this)) | p) - } + deprecated string getPath() { none() } /** + * DEPRECATED. Use label-specific predicates in this class, such as `getMember`, instead of using `getASuccessor`. + * * Gets a node such that there is an edge in the API graph between this node and the other * one, and that edge is labeled with `lbl`. */ - Node getASuccessor(Label::ApiLabel lbl) { Impl::edge(this, lbl, result) } + pragma[inline] + deprecated Node getASuccessor(Label::ApiLabel lbl) { + labelledEdge(this.getAnEpsilonSuccessor(), lbl, result) + } /** + * DEPRECATED. API graphs no longer support backward traversal of edges. If possible use `.backtrack()` to get + * a node intended for backtracking. + * * Gets a node such that there is an edge in the API graph between that other node and * this one, and that edge is labeled with `lbl` */ - Node getAPredecessor(Label::ApiLabel lbl) { this = result.getASuccessor(lbl) } + deprecated Node getAPredecessor(Label::ApiLabel lbl) { this = result.getASuccessor(lbl) } /** + * DEPRECATED. API graphs no longer support backward traversal of edges. If possible use `.backtrack()` to get + * a node intended for backtracking. + * * Gets a node such that there is an edge in the API graph between this node and the other * one. */ - Node getAPredecessor() { result = this.getAPredecessor(_) } + deprecated Node getAPredecessor() { result = this.getAPredecessor(_) } /** * Gets a node such that there is an edge in the API graph between that other node and * this one. */ - Node getASuccessor() { result = this.getASuccessor(_) } + pragma[inline] + deprecated Node getASuccessor() { result = this.getASuccessor(_) } - /** - * Gets the data-flow node that gives rise to this node, if any. - */ - DataFlow::Node getInducingNode() { - this = Impl::MkUse(result) - or - this = Impl::MkDef(result) - or - this = Impl::MkMethodAccessNode(result) - } + /** DEPRECATED. API graphs are no longer associated with a depth. */ + deprecated int getDepth() { none() } - /** Gets the location of this node. */ - Location getLocation() { - result = this.getInducingNode().getLocation() - or - exists(DataFlow::ModuleNode mod | - this = Impl::MkModuleObject(mod) and - result = mod.getLocation() - ) - or - // For nodes that do not have a meaningful location, `path` is the empty string and all other - // parameters are zero. - not exists(this.getInducingNode()) and - result instanceof EmptyLocation - } - - /** - * Gets a textual representation of this element. - */ - string toString() { none() } - - /** - * Gets a path of the given `length` from the root to this node. - */ - private string getAPath(int length) { - this instanceof Impl::MkRoot and - length = 0 and - result = "" - or - exists(Node pred, Label::ApiLabel lbl, string predpath | - Impl::edge(pred, lbl, this) and - predpath = pred.getAPath(length - 1) and - exists(string dot | if length = 1 then dot = "" else dot = "." | - result = predpath + dot + lbl and - // avoid producing strings longer than 1MB - result.length() < 1000 * 1000 - ) - ) and - length in [1 .. Impl::distanceFromRoot(this)] - } - - /** Gets the shortest distance from the root to this node in the API graph. */ - int getDepth() { result = Impl::distanceFromRoot(this) } + pragma[inline] + private Node getAnEpsilonSuccessor() { result = getAnEpsilonSuccessorInline(this) } } - /** Companion module to the `Node` class. */ - module Node { - /** - * INTERNAL USE ONLY. - * - * An API node, with some internal predicates exposed. - */ - class Internal extends Node { - /** - * INTERNAL USE ONLY. - * - * Same as `asSource()` but without join-order hints. - */ - cached - DataFlow::LocalSourceNode asSourceInternal() { - Impl::forceCachingInSameStage() and - Impl::use(this, result) - } + /** DEPRECATED. Use `API::root()` to access the root node. */ + deprecated class Root = RootNode; - /** - * Same as `getMember` but without join-order hints. - */ - cached - Node getMemberInternal(string m) { - Impl::forceCachingInSameStage() and - result = this.getASuccessor(Label::member(m)) - } + /** DEPRECATED. A node corresponding to the use of an API component. */ + deprecated class Use = ForwardNode; - /** - * Same as `getMethod` but without join-order hints. - */ - cached - MethodAccessNode getMethodInternal(string method) { - Impl::forceCachingInSameStage() and - result = this.getASubclass().getASuccessor(Label::method(method)) - } - - /** - * INTERNAL USE ONLY. - * - * Same as `getReturn()` but without join-order hints. - */ - cached - Node getReturnInternal() { - Impl::forceCachingInSameStage() and result = this.getASuccessor(Label::return()) - } - } - } - - bindingset[node] - pragma[inline_late] - private DataFlow::Node getAValueReachableFromSourceInline(Node node) { - exists(DataFlow::LocalSourceNode src, DataFlow::LocalSourceNode dst | - Impl::use(node, pragma[only_bind_into](src)) and - pragma[only_bind_into](dst) = Impl::trackUseNode(src) and - dst.flowsTo(result) - ) - } + /** DEPRECATED. A node corresponding to a value escaping into an API component. */ + deprecated class Def = SinkNode; /** The root node of an API graph. */ - class Root extends Node, Impl::MkRoot { - override string toString() { result = "root" } + private class RootNode extends Node, Impl::MkRoot { + override string toString() { result = "Root()" } } - private string tryGetPath(Node node) { - result = node.getPath() - or - not exists(node.getPath()) and - result = "with no path" + /** A node representing a given type-tracking state when tracking forwards. */ + private class ForwardNode extends Node, Impl::MkForwardNode { + private DataFlow::LocalSourceNode node; + private TypeTracker tracker; + + ForwardNode() { this = Impl::MkForwardNode(node, tracker) } + + override string toString() { + if tracker.start() + then result = "ForwardNode(" + node + ")" + else result = "ForwardNode(" + node + ", " + tracker + ")" + } } - /** A node corresponding to the use of an API component. */ - class Use extends Node, Impl::MkUse { - override string toString() { result = "Use " + tryGetPath(this) } + /** A node representing a given type-tracking state when tracking backwards. */ + private class BackwardNode extends Node, Impl::MkBackwardNode { + private DataFlow::LocalSourceNode node; + private TypeTracker tracker; + + BackwardNode() { this = Impl::MkBackwardNode(node, tracker) } + + override string toString() { + if tracker.start() + then result = "BackwardNode(" + node + ")" + else result = "BackwardNode(" + node + ", " + tracker + ")" + } } - /** A node corresponding to a value escaping into an API component. */ - class Def extends Node, Impl::MkDef { - override string toString() { result = "Def " + tryGetPath(this) } + /** A node representing a module/class object with epsilon edges to its descendents. */ + private class ModuleObjectDownNode extends Node, Impl::MkModuleObjectDown { + /** Gets the module represented by this API node. */ + DataFlow::ModuleNode getModule() { this = Impl::MkModuleObjectDown(result) } + + override string toString() { result = "ModuleObjectDown(" + this.getModule() + ")" } + } + + /** A node representing a module/class object with epsilon edges to its ancestors. */ + private class ModuleObjectUpNode extends Node, Impl::MkModuleObjectUp { + /** Gets the module represented by this API node. */ + DataFlow::ModuleNode getModule() { this = Impl::MkModuleObjectUp(result) } + + override string toString() { result = "ModuleObjectUp(" + this.getModule() + ")" } + } + + /** A node representing instances of a module/class with epsilon edges to its ancestors. */ + private class ModuleInstanceUpNode extends Node, Impl::MkModuleInstanceUp { + /** Gets the module whose instances are represented by this API node. */ + DataFlow::ModuleNode getModule() { this = Impl::MkModuleInstanceUp(result) } + + override string toString() { result = "ModuleInstanceUp(" + this.getModule() + ")" } + } + + /** A node representing instances of a module/class with epsilon edges to its descendents. */ + private class ModuleInstanceDownNode extends Node, Impl::MkModuleInstanceDown { + /** Gets the module whose instances are represented by this API node. */ + DataFlow::ModuleNode getModule() { this = Impl::MkModuleInstanceDown(result) } + + override string toString() { result = "ModuleInstanceDown(" + this.getModule() + ")" } } /** A node corresponding to the method being invoked at a method call. */ class MethodAccessNode extends Node, Impl::MkMethodAccessNode { - override string toString() { result = "MethodAccessNode " + tryGetPath(this) } + override string toString() { result = "MethodAccessNode(" + this.asCall() + ")" } + } - /** Gets the call node corresponding to this method access. */ - DataFlow::CallNode getCallNode() { this = Impl::MkMethodAccessNode(result) } + /** + * A node corresponding to an argument, right-hand side of a store, or return value from a callable. + * + * Such a node may serve as the starting-point of backtracking, and has epsilon edges going to + * the backward nodes corresponding to `getALocalSource`. + */ + private class SinkNode extends Node, Impl::MkSinkNode { + override string toString() { result = "SinkNode(" + this.getInducingNode() + ")" } } /** @@ -499,6 +797,8 @@ module API { * additional entry points may be added by extending this class. */ abstract class EntryPoint extends string { + // Note: this class can be deprecated in Ruby, but is still referenced by shared code in ApiGraphModels.qll, + // where it can't be removed since other languages are still dependent on the EntryPoint class. bindingset[this] EntryPoint() { any() } @@ -518,7 +818,7 @@ module API { DataFlow::CallNode getACall() { none() } /** Gets an API-node for this entry point. */ - API::Node getANode() { result = root().getASuccessor(Label::entryPoint(this)) } + API::Node getANode() { Impl::entryPointEdge(this, result) } } // Ensure all entry points are imported from ApiGraphs.qll @@ -527,88 +827,301 @@ module API { } /** Gets the root node. */ - Root root() { any() } + Node root() { result instanceof RootNode } /** - * Gets a node corresponding to a top-level member `m` (typically a module). + * Gets an access to the top-level constant `name`. * - * This is equivalent to `root().getAMember("m")`. - * - * Note: You should only use this predicate for top level modules or classes. If you want nodes corresponding to a nested module or class, - * you should use `.getMember` on the parent module/class. For example, for nodes corresponding to the class `Gem::Version`, + * To access nested constants, use `getMember()` on the resulting node. For example, for nodes corresponding to the class `Gem::Version`, * use `getTopLevelMember("Gem").getMember("Version")`. */ - cached - Node getTopLevelMember(string m) { - Impl::forceCachingInSameStage() and result = root().(Node::Internal).getMemberInternal(m) - } + pragma[inline] + Node getTopLevelMember(string name) { Impl::topLevelMember(name, result) } /** - * Provides the actual implementation of API graphs, cached for performance. - * - * Ideally, we'd like nodes to correspond to (global) access paths, with edge labels - * corresponding to extending the access path by one element. We also want to be able to map - * nodes to their definitions and uses in the data-flow graph, and this should happen modulo - * (inter-procedural) data flow. - * - * This, however, is not easy to implement, since access paths can have unbounded length - * and we need some way of recognizing cycles to avoid non-termination. Unfortunately, expressing - * a condition like "this node hasn't been involved in constructing any predecessor of - * this node in the API graph" without negative recursion is tricky. - * - * So instead most nodes are directly associated with a data-flow node, representing - * either a use or a definition of an API component. This ensures that we only have a finite - * number of nodes. However, we can now have multiple nodes with the same access - * path, which are essentially indistinguishable for a client of the API. - * - * On the other hand, a single node can have multiple access paths (which is, of - * course, unavoidable). We pick as canonical the alphabetically least access path with - * shortest length. + * Gets an unqualified call at the top-level with the given method name. */ - cached - private module Impl { - cached - predicate forceCachingInSameStage() { any() } + pragma[inline] + MethodAccessNode getTopLevelCall(string name) { Impl::toplevelCall(name, result) } - cached - predicate forceCachingBackref() { - 1 = 1 + pragma[nomagic] + private predicate isReachable(DataFlow::LocalSourceNode node, TypeTracker t) { + t.start() and exists(node) + or + exists(DataFlow::LocalSourceNode prev, TypeTracker t2 | + isReachable(prev, t2) and + node = prev.track(t2, t) and + notSelfParameter(node) + ) + } + + bindingset[node] + pragma[inline_late] + private predicate notSelfParameter(DataFlow::Node node) { + not node instanceof DataFlow::SelfParameterNode + } + + private module SharedArg implements ApiGraphSharedSig { + class ApiNode = Node; + + ApiNode getForwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { + result = Impl::MkForwardNode(node, t) + } + + ApiNode getBackwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { + result = Impl::MkBackwardNode(node, t) + } + + ApiNode getSinkNode(DataFlow::Node node) { result = Impl::MkSinkNode(node) } + + pragma[nomagic] + predicate specificEpsilonEdge(ApiNode pred, ApiNode succ) { + exists(DataFlow::ModuleNode mod | + moduleReferenceEdge(mod, pred, succ) + or + moduleInheritanceEdge(mod, pred, succ) + or + pred = getForwardEndNode(getSuperClassNode(mod)) and + succ = Impl::MkModuleObjectDown(mod) + ) or - exists(getTopLevelMember(_)) + implicitCallEdge(pred, succ) or - exists( - any(Node n) - .(Node::Internal) - .getMemberInternal("foo") - .getAMember() - .(Node::Internal) - .getMethodInternal("foo") - .(Node::Internal) - .getReturnInternal() - .getParameter(0) - .getKeywordParameter("foo") - .getBlock() - .getAnImmediateSubclass() - .getContent(_) - .getField(_) - .getAnElement() - .(Node::Internal) - .asSourceInternal() + exists(DataFlow::HashLiteralNode splat | hashSplatEdge(splat, pred, succ)) + } + + /** + * Holds if the epsilon edge `pred -> succ` should be generated, to handle inheritance relations of `mod`. + */ + pragma[inline] + private predicate moduleInheritanceEdge(DataFlow::ModuleNode mod, ApiNode pred, ApiNode succ) { + pred = Impl::MkModuleObjectDown(mod) and + succ = Impl::MkModuleObjectDown(mod.getAnImmediateDescendent()) + or + pred = Impl::MkModuleInstanceDown(mod) and + succ = Impl::MkModuleInstanceDown(mod.getAnImmediateDescendent()) + or + exists(DataFlow::ModuleNode ancestor | + ancestor = mod.getAnImmediateAncestor() and + // Restrict flow back to Object to avoid spurious flow for methods that happen + // to exist on Object, such as top-level methods. + not ancestor.getQualifiedName() = "Object" + | + pred = Impl::MkModuleInstanceUp(mod) and + succ = Impl::MkModuleInstanceUp(ancestor) + or + pred = Impl::MkModuleObjectUp(mod) and + succ = Impl::MkModuleObjectUp(ancestor) + ) + or + // Due to multiple inheritance, allow upwards traversal after downward traversal, + // so we can detect calls sideways in the hierarchy. + // Note that a similar case does not exist for ModuleObject since singleton methods are only inherited + // from the superclass, and there can only be one superclass. + pred = Impl::MkModuleInstanceDown(mod) and + succ = Impl::MkModuleInstanceUp(mod) + } + + /** + * Holds if the epsilon `pred -> succ` should be generated, to associate `mod` with its references in the codebase. + */ + bindingset[mod] + pragma[inline_late] + private predicate moduleReferenceEdge(DataFlow::ModuleNode mod, ApiNode pred, ApiNode succ) { + pred = Impl::MkModuleObjectDown(mod) and + succ = getForwardStartNode(getAModuleReference(mod)) + or + pred = getBackwardEndNode(getAModuleReference(mod)) and + ( + succ = Impl::MkModuleObjectUp(mod) + or + succ = Impl::MkModuleObjectDown(mod) + ) + or + pred = Impl::MkModuleInstanceUp(mod) and + succ = getAModuleInstanceUseNode(mod) + or + pred = getAModuleInstanceDefNode(mod) and + succ = Impl::MkModuleInstanceUp(mod) + or + pred = getAModuleDescendentInstanceDefNode(mod) and + succ = Impl::MkModuleInstanceDown(mod) + } + + /** + * Holds if the epsilon step `pred -> succ` should be generated to account for the fact that `getMethod("call")` + * may be omitted when dealing with blocks, lambda, or procs. + * + * For example, a block may be invoked by a `yield`, or can be converted to a proc and then invoked via `.call`. + * To simplify this, the implicit proc conversion is seen as a no-op and the `.call` is omitted. + */ + pragma[nomagic] + private predicate implicitCallEdge(ApiNode pred, ApiNode succ) { + // Step from &block parameter to yield call without needing `getMethod("call")`. + exists(DataFlow::MethodNode method | + pred = getForwardEndNode(method.getBlockParameter()) and + succ = Impl::MkMethodAccessNode(method.getABlockCall()) + ) + or + // Step from x -> x.call (the call itself, not its return value), without needing `getMethod("call")`. + exists(DataFlow::CallNode call | + call.getMethodName() = "call" and + pred = getForwardEndNode(getALocalSourceStrict(call.getReceiver())) and + succ = Impl::MkMethodAccessNode(call) + ) + or + exists(DataFlow::ModuleNode mod | + // Step from module/class to its own `call` method without needing `getMethod("call")`. + (pred = Impl::MkModuleObjectDown(mod) or pred = Impl::MkModuleObjectUp(mod)) and + succ = getBackwardEndNode(mod.getOwnSingletonMethod("call")) + or + pred = Impl::MkModuleInstanceUp(mod) and + succ = getBackwardEndNode(mod.getOwnInstanceMethod("call")) ) } + pragma[nomagic] + private DataFlow::Node getHashSplatArgument(DataFlow::HashLiteralNode literal) { + result = DataFlowPrivate::TSynthHashSplatArgumentNode(literal.asExpr()) + } + + /** + * Holds if the epsilon edge `pred -> succ` should be generated to account for the members of a hash literal. + * + * This currently exists because hash literals are desugared to `Hash.[]` calls, whose summary relies on `WithContent`. + * However, `contentEdge` does not currently generate edges for `WithContent` steps. + */ + bindingset[literal] + pragma[inline_late] + private predicate hashSplatEdge(DataFlow::HashLiteralNode literal, ApiNode pred, ApiNode succ) { + exists(TypeTracker t | + pred = Impl::MkForwardNode(getALocalSourceStrict(getHashSplatArgument(literal)), t) and + succ = Impl::MkForwardNode(pragma[only_bind_out](literal), pragma[only_bind_out](t)) + or + succ = Impl::MkBackwardNode(getALocalSourceStrict(getHashSplatArgument(literal)), t) and + pred = Impl::MkBackwardNode(pragma[only_bind_out](literal), pragma[only_bind_out](t)) + ) + } + + pragma[nomagic] + private DataFlow::LocalSourceNode getAModuleReference(DataFlow::ModuleNode mod) { + result = mod.getAnImmediateReference() + or + mod.getAnAncestor().getAnOwnInstanceSelf() = getANodeReachingClassCall(result) + } + + /** + * Gets an API node that may refer to an instance of `mod`. + */ + bindingset[mod] + pragma[inline_late] + private ApiNode getAModuleInstanceUseNode(DataFlow::ModuleNode mod) { + result = getForwardStartNode(mod.getAnOwnInstanceSelf()) + } + + /** + * Gets a node that can be backtracked to an instance of `mod`. + */ + bindingset[mod] + pragma[inline_late] + private ApiNode getAModuleInstanceDefNode(DataFlow::ModuleNode mod) { + result = getBackwardEndNode(mod.getAnImmediateReference().getAMethodCall("new")) + } + + /** + * Gets a node that can be backtracked to an instance of `mod` or any of its descendents. + */ + bindingset[mod] + pragma[inline_late] + private ApiNode getAModuleDescendentInstanceDefNode(DataFlow::ModuleNode mod) { + result = getBackwardEndNode(mod.getAnOwnInstanceSelf()) + } + + /** + * Holds if `superclass` is the superclass of `mod`. + */ + pragma[nomagic] + private DataFlow::LocalSourceNode getSuperClassNode(DataFlow::ModuleNode mod) { + result.getALocalUse().asExpr().getExpr() = + mod.getADeclaration().(ClassDeclaration).getSuperclassExpr() + } + + /** Gets a node that can reach the receiver of the given `.class` call. */ + private DataFlow::LocalSourceNode getANodeReachingClassCall( + DataFlow::CallNode call, TypeBackTracker t + ) { + t.start() and + call.getMethodName() = "class" and + result = getALocalSourceStrict(call.getReceiver()) + or + exists(DataFlow::LocalSourceNode prev, TypeBackTracker t2 | + prev = getANodeReachingClassCall(call, t2) and + result = prev.backtrack(t2, t) and + notSelfParameter(prev) + ) + } + + /** Gets a node that can reach the receiver of the given `.class` call. */ + private DataFlow::LocalSourceNode getANodeReachingClassCall(DataFlow::CallNode call) { + result = getANodeReachingClassCall(call, TypeBackTracker::end()) + } + } + + /** INTERNAL USE ONLY. */ + module Internal { + private module Shared = ApiGraphShared; + + import Shared + + /** Gets the API node corresponding to the module/class object for `mod`. */ + bindingset[mod] + pragma[inline_late] + Node getModuleNode(DataFlow::ModuleNode mod) { result = Impl::MkModuleObjectDown(mod) } + + /** Gets the API node corresponding to instances of `mod`. */ + bindingset[mod] + pragma[inline_late] + Node getModuleInstance(DataFlow::ModuleNode mod) { result = getModuleNode(mod).getInstance() } + } + + private import Internal + import Internal::Public + + cached + private module Impl { cached newtype TApiNode = /** The root of the API graph. */ MkRoot() or /** The method accessed at `call`, synthetically treated as a separate object. */ - MkMethodAccessNode(DataFlow::CallNode call) { isUse(call) } or - /** A use of an API member at the node `nd`. */ - MkUse(DataFlow::Node nd) { isUse(nd) } or - /** A value that escapes into an external library at the node `nd` */ - MkDef(DataFlow::Node nd) { isDef(nd) } or - /** A module object seen as a use node. */ - MkModuleObject(DataFlow::ModuleNode mod) + MkMethodAccessNode(DataFlow::CallNode call) or + /** The module object `mod` with epsilon edges to its ancestors. */ + MkModuleObjectUp(DataFlow::ModuleNode mod) or + /** The module object `mod` with epsilon edges to its descendents. */ + MkModuleObjectDown(DataFlow::ModuleNode mod) or + /** Instances of `mod` with epsilon edges to its ancestors. */ + MkModuleInstanceUp(DataFlow::ModuleNode mod) or + /** Instances of `mod` with epsilon edges to its descendents, and to its upward node. */ + MkModuleInstanceDown(DataFlow::ModuleNode mod) or + /** Intermediate node for following forward data flow. */ + MkForwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or + /** Intermediate node for following backward data flow. */ + MkBackwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or + MkSinkNode(DataFlow::Node node) { needsSinkNode(node) } + + private predicate needsSinkNode(DataFlow::Node node) { + node instanceof DataFlowPrivate::ArgumentNode + or + TypeTrackerSpecific::basicStoreStep(node, _, _) + or + node = any(DataFlow::CallableNode callable).getAReturnNode() + or + node = any(EntryPoint e).getASink() + } + + bindingset[e] + pragma[inline_late] + private DataFlow::Node getNodeFromExpr(Expr e) { result.asExpr().getExpr() = e } private string resolveTopLevel(ConstantReadAccess read) { result = read.getModule().getQualifiedName() and @@ -616,300 +1129,288 @@ module API { } /** - * Holds if `ref` is a use of a node that should have an incoming edge from the root - * node labeled `lbl` in the API graph (not including those from API::EntryPoint). + * Holds `pred` should have a member edge to `mod`. */ pragma[nomagic] - private predicate useRoot(Label::ApiLabel lbl, DataFlow::Node ref) { - exists(string name, ConstantReadAccess read | - read = ref.asExpr().getExpr() and - lbl = Label::member(read.getName()) + private predicate moduleScope(DataFlow::ModuleNode mod, Node pred, string name) { + exists(Namespace namespace | + name = namespace.getName() and + namespace = mod.getADeclaration() | - name = resolveTopLevel(read) + exists(DataFlow::Node scopeNode | + scopeNode.asExpr().getExpr() = namespace.getScopeExpr() and + pred = getForwardEndNode(getALocalSourceStrict(scopeNode)) + ) or - name = read.getName() and - not exists(resolveTopLevel(read)) and - not exists(read.getScopeExpr()) + not exists(namespace.getScopeExpr()) and + if namespace.hasGlobalScope() or namespace.getEnclosingModule() instanceof Toplevel + then pred = MkRoot() + else pred = MkModuleObjectDown(namespace.getEnclosingModule().getModule()) ) } - /** - * Holds if `ref` is a use of a node that should have an incoming edge labeled `lbl`, - * from a use node that flows to `node`. - */ - private predicate useStep(Label::ApiLabel lbl, DataFlow::Node node, DataFlow::Node ref) { - // // Referring to an attribute on a node that is a use of `base`: - // pred = `Rails` part of `Rails::Whatever` - // lbl = `Whatever` - // ref = `Rails::Whatever` - exists(ExprNodes::ConstantAccessCfgNode c, ConstantReadAccess read | - not exists(resolveTopLevel(read)) and - node.asExpr() = c.getScopeExpr() and - lbl = Label::member(read.getName()) and - ref.asExpr() = c and - read = c.getExpr() - ) - or - exists(TypeTrackerSpecific::TypeTrackerContent c | - TypeTrackerSpecific::basicLoadStep(node, ref, c) and - lbl = Label::content(c.getAStoreContent()) and - not c.isSingleton(any(DataFlow::Content::AttributeNameContent k)) - ) - // note: method calls are not handled here as there is no DataFlow::Node for the intermediate MkMethodAccessNode API node - } - - /** - * Holds if `rhs` is a definition of a node that should have an incoming edge labeled `lbl`, - * from a def node that is reachable from `node`. - */ - private predicate defStep(Label::ApiLabel lbl, DataFlow::Node node, DataFlow::Node rhs) { - exists(TypeTrackerSpecific::TypeTrackerContent c | - TypeTrackerSpecific::basicStoreStep(rhs, node, c) and - lbl = Label::content(c.getAStoreContent()) - ) - } - - pragma[nomagic] - private predicate isUse(DataFlow::Node nd) { - useRoot(_, nd) - or - exists(DataFlow::Node node | - useCandFwd().flowsTo(node) and - useStep(_, node, nd) - ) - or - useCandFwd().flowsTo(nd.(DataFlow::CallNode).getReceiver()) - or - parameterStep(_, defCand(), nd) - or - nd = any(EntryPoint entry).getASource() - or - nd = any(EntryPoint entry).getACall() - } - - /** - * Holds if `ref` is a use of node `nd`. - */ cached - predicate use(TApiNode nd, DataFlow::Node ref) { - nd = MkUse(ref) + predicate memberEdge(Node pred, string name, Node succ) { + exists(ConstantReadAccess read | succ = getForwardStartNode(getNodeFromExpr(read)) | + name = resolveTopLevel(read) and + pred = MkRoot() + or + not exists(resolveTopLevel(read)) and + not exists(read.getScopeExpr()) and + name = read.getName() and + pred = MkRoot() + or + pred = getForwardEndNode(getALocalSourceStrict(getNodeFromExpr(read.getScopeExpr()))) and + name = read.getName() + ) or exists(DataFlow::ModuleNode mod | - nd = MkModuleObject(mod) and - ref = mod.getAnImmediateReference() + moduleScope(mod, pred, name) and + (succ = MkModuleObjectDown(mod) or succ = MkModuleObjectUp(mod)) ) } - /** - * Holds if `rhs` is a RHS of node `nd`. - */ cached - predicate def(TApiNode nd, DataFlow::Node rhs) { nd = MkDef(rhs) } + predicate topLevelMember(string name, Node node) { memberEdge(root(), name, node) } - /** Gets a node reachable from a use-node. */ - private DataFlow::LocalSourceNode useCandFwd(TypeTracker t) { - t.start() and - isUse(result) - or - exists(TypeTracker t2 | result = useCandFwd(t2).track(t2, t)) - } - - /** Gets a node reachable from a use-node. */ - private DataFlow::LocalSourceNode useCandFwd() { result = useCandFwd(TypeTracker::end()) } - - private predicate isDef(DataFlow::Node rhs) { - // If a call node is relevant as a use-node, treat its arguments as def-nodes - argumentStep(_, useCandFwd(), rhs) - or - defStep(_, defCand(), rhs) - or - rhs = any(EntryPoint entry).getASink() - } - - /** Gets a data flow node that flows to the RHS of a def-node. */ - private DataFlow::LocalSourceNode defCand(TypeBackTracker t) { - t.start() and - exists(DataFlow::Node rhs | - isDef(rhs) and - result = rhs.getALocalSource() - ) - or - exists(TypeBackTracker t2 | result = defCand(t2).backtrack(t2, t)) - } - - /** Gets a data flow node that flows to the RHS of a def-node. */ - private DataFlow::LocalSourceNode defCand() { result = defCand(TypeBackTracker::end()) } - - /** - * Holds if there should be a `lbl`-edge from the given call to an argument. - */ - pragma[nomagic] - private predicate argumentStep( - Label::ApiLabel lbl, DataFlow::CallNode call, DataFlowPrivate::ArgumentNode argument - ) { - exists(DataFlowDispatch::ArgumentPosition argPos | - argument.sourceArgumentOf(call.asExpr(), argPos) and - lbl = Label::getLabelFromArgumentPosition(argPos) - ) - } - - /** - * Holds if there should be a `lbl`-edge from the given callable to a parameter. - */ - pragma[nomagic] - private predicate parameterStep( - Label::ApiLabel lbl, DataFlow::Node callable, DataFlowPrivate::ParameterNodeImpl paramNode - ) { - exists(DataFlowDispatch::ParameterPosition paramPos | - paramNode.isSourceParameterOf(callable.asExpr().getExpr(), paramPos) and - lbl = Label::getLabelFromParameterPosition(paramPos) - ) - } - - /** - * Gets a data-flow node to which `src`, which is a use of an API-graph node, flows. - * - * The flow from `src` to the returned node may be inter-procedural. - */ - private DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src, TypeTracker t) { - result = src and - isUse(src) and - t.start() - or - exists(TypeTracker t2 | - result = trackUseNode(src, t2).track(t2, t) and - not result instanceof DataFlow::SelfParameterNode - ) - } - - /** - * Gets a data-flow node to which `src`, which is a use of an API-graph node, flows. - * - * The flow from `src` to the returned node may be inter-procedural. - */ cached - DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) { - result = trackUseNode(src, TypeTracker::end()) - } - - /** Gets a data flow node reaching the RHS of the given def node. */ - private DataFlow::LocalSourceNode trackDefNode(DataFlow::Node rhs, TypeBackTracker t) { - t.start() and - isDef(rhs) and - result = rhs.getALocalSource() - or - exists(TypeBackTracker t2, DataFlow::LocalSourceNode mid | - mid = trackDefNode(rhs, t2) and - not mid instanceof DataFlow::SelfParameterNode and - result = mid.backtrack(t2, t) + predicate toplevelCall(string name, Node node) { + exists(DataFlow::CallNode call | + call.asExpr().getExpr().getCfgScope() instanceof Toplevel and + call.getMethodName() = name and + node = MkMethodAccessNode(call) ) } - /** Gets a data flow node reaching the RHS of the given def node. */ cached - DataFlow::LocalSourceNode trackDefNode(DataFlow::Node rhs) { - result = trackDefNode(rhs, TypeBackTracker::end()) - } + predicate anyMemberEdge(Node pred, Node succ) { memberEdge(pred, _, succ) } - pragma[nomagic] - private predicate useNodeReachesReceiver(DataFlow::Node use, DataFlow::CallNode call) { - trackUseNode(use).flowsTo(call.getReceiver()) - } - - /** - * Holds if `superclass` is the superclass of `mod`. - */ - pragma[nomagic] - private predicate superclassNode(DataFlow::ModuleNode mod, DataFlow::Node superclass) { - superclass.asExpr().getExpr() = mod.getADeclaration().(ClassDeclaration).getSuperclassExpr() - } - - /** - * Holds if there is an edge from `pred` to `succ` in the API graph that is labeled with `lbl`. - */ cached - predicate edge(TApiNode pred, Label::ApiLabel lbl, TApiNode succ) { - /* Every node that is a use of an API component is itself added to the API graph. */ - exists(DataFlow::LocalSourceNode ref | succ = MkUse(ref) | - pred = MkRoot() and - useRoot(lbl, ref) + predicate methodEdge(Node pred, string name, Node succ) { + exists(DataFlow::ModuleNode mod, DataFlow::CallNode call | + // Treat super calls as if they were calls to the module object/instance. + succ = MkMethodAccessNode(call) and + name = call.getMethodName() + | + pred = MkModuleObjectDown(mod) and + call = mod.getAnOwnSingletonMethod().getASuperCall() or - exists(DataFlow::Node node, DataFlow::Node src | - use(pred, src) and - trackUseNode(src).flowsTo(node) and - useStep(lbl, node, ref) - ) - or - exists(DataFlow::Node callback | - def(pred, callback) and - parameterStep(lbl, trackDefNode(callback), ref) - ) - ) - or - exists(DataFlow::Node predNode, DataFlow::Node succNode | - def(pred, predNode) and - succ = MkDef(succNode) and - defStep(lbl, trackDefNode(predNode), succNode) - ) - or - exists(DataFlow::Node predNode, DataFlow::Node superclassNode, DataFlow::ModuleNode mod | - use(pred, predNode) and - trackUseNode(predNode).flowsTo(superclassNode) and - superclassNode(mod, superclassNode) and - succ = MkModuleObject(mod) and - lbl = Label::subclass() + pred = MkModuleInstanceUp(mod) and + call = mod.getAnOwnInstanceMethod().getASuperCall() ) or exists(DataFlow::CallNode call | // from receiver to method call node - exists(DataFlow::Node receiver | - use(pred, receiver) and - useNodeReachesReceiver(receiver, call) and - lbl = Label::method(call.getMethodName()) and - succ = MkMethodAccessNode(call) - ) - or - // from method call node to return and arguments - pred = MkMethodAccessNode(call) and - ( - lbl = Label::return() and - succ = MkUse(call) - or - exists(DataFlow::Node rhs | - argumentStep(lbl, call, rhs) and - succ = MkDef(rhs) - ) - ) + pred = getForwardEndNode(getALocalSourceStrict(call.getReceiver())) and + succ = MkMethodAccessNode(call) and + name = call.getMethodName() ) or - exists(EntryPoint entry | - pred = root() and - lbl = Label::entryPoint(entry) - | - succ = MkDef(entry.getASink()) + exists(DataFlow::ModuleNode mod | + (pred = MkModuleObjectDown(mod) or pred = MkModuleObjectUp(mod)) and + succ = getBackwardStartNode(mod.getOwnSingletonMethod(name)) or - succ = MkUse(entry.getASource()) - or - succ = MkMethodAccessNode(entry.getACall()) + pred = MkModuleInstanceUp(mod) and + succ = getBackwardStartNode(mod.getOwnInstanceMethod(name)) ) } - /** - * Holds if there is an edge from `pred` to `succ` in the API graph. - */ - private predicate edge(TApiNode pred, TApiNode succ) { edge(pred, _, succ) } - - /** Gets the shortest distance from the root to `nd` in the API graph. */ cached - int distanceFromRoot(TApiNode nd) = shortestDistances(MkRoot/0, edge/2)(_, nd, result) + predicate asCallable(Node apiNode, DataFlow::CallableNode callable) { + apiNode = getBackwardStartNode(callable) + } + cached + predicate contentEdge(Node pred, DataFlow::Content content, Node succ) { + exists( + DataFlow::Node object, DataFlow::Node value, TypeTrackerSpecific::TypeTrackerContent c + | + TypeTrackerSpecific::basicLoadStep(object, value, c) and + content = c.getAStoreContent() and + not c.isSingleton(any(DataFlow::Content::AttributeNameContent k)) and + // `x -> x.foo` with content "foo" + pred = getForwardOrBackwardEndNode(getALocalSourceStrict(object)) and + succ = getForwardStartNode(value) + or + // Based on `object.c = value` generate `object -> value` with content `c` + TypeTrackerSpecific::basicStoreStep(value, object, c) and + content = c.getAStoreContent() and + pred = getForwardOrBackwardEndNode(getALocalSourceStrict(object)) and + succ = MkSinkNode(value) + ) + } + + cached + predicate fieldEdge(Node pred, string name, Node succ) { + Impl::contentEdge(pred, DataFlowPrivate::TFieldContent(name), succ) + } + + cached + predicate elementEdge(Node pred, Node succ) { + contentEdge(pred, any(DataFlow::ContentSet set | set.isAnyElement()).getAReadContent(), succ) + } + + cached + predicate parameterEdge(Node pred, DataFlowDispatch::ParameterPosition paramPos, Node succ) { + exists(DataFlowPrivate::ParameterNodeImpl parameter, DataFlow::CallableNode callable | + parameter.isSourceParameterOf(callable.asExpr().getExpr(), paramPos) and + pred = getBackwardEndNode(callable) and + succ = getForwardStartNode(parameter) + ) + } + + cached + predicate argumentEdge(Node pred, DataFlowDispatch::ArgumentPosition argPos, Node succ) { + exists(DataFlow::CallNode call, DataFlowPrivate::ArgumentNode argument | + argument.sourceArgumentOf(call.asExpr(), argPos) and + pred = MkMethodAccessNode(call) and + succ = MkSinkNode(argument) + ) + } + + cached + predicate positionalArgumentEdge(Node pred, int n, Node succ) { + argumentEdge(pred, any(DataFlowDispatch::ArgumentPosition pos | pos.isPositional(n)), succ) + } + + cached + predicate keywordArgumentEdge(Node pred, string name, Node succ) { + argumentEdge(pred, any(DataFlowDispatch::ArgumentPosition pos | pos.isKeyword(name)), succ) + } + + private predicate blockArgumentEdge(Node pred, Node succ) { + argumentEdge(pred, any(DataFlowDispatch::ArgumentPosition pos | pos.isBlock()), succ) + } + + private predicate positionalParameterEdge(Node pred, int n, Node succ) { + parameterEdge(pred, any(DataFlowDispatch::ParameterPosition pos | pos.isPositional(n)), succ) + } + + private predicate keywordParameterEdge(Node pred, string name, Node succ) { + parameterEdge(pred, any(DataFlowDispatch::ParameterPosition pos | pos.isKeyword(name)), succ) + } + + cached + predicate blockParameterEdge(Node pred, Node succ) { + parameterEdge(pred, any(DataFlowDispatch::ParameterPosition pos | pos.isBlock()), succ) + } + + cached + predicate positionalParameterOrArgumentEdge(Node pred, int n, Node succ) { + positionalArgumentEdge(pred, n, succ) + or + positionalParameterEdge(pred, n, succ) + } + + cached + predicate keywordParameterOrArgumentEdge(Node pred, string name, Node succ) { + keywordArgumentEdge(pred, name, succ) + or + keywordParameterEdge(pred, name, succ) + } + + cached + predicate blockParameterOrArgumentEdge(Node pred, Node succ) { + blockArgumentEdge(pred, succ) + or + blockParameterEdge(pred, succ) + } + + pragma[nomagic] + private predicate newCall(DataFlow::LocalSourceNode receiver, DataFlow::CallNode call) { + call = receiver.getAMethodCall("new") + } + + cached + predicate instanceEdge(Node pred, Node succ) { + exists(DataFlow::ModuleNode mod | + pred = MkModuleObjectDown(mod) and + succ = MkModuleInstanceUp(mod) + ) + or + exists(DataFlow::LocalSourceNode receiver, DataFlow::CallNode call | + newCall(receiver, call) and + pred = getForwardEndNode(receiver) and + succ = getForwardStartNode(call) + ) + } + + cached + predicate returnEdge(Node pred, Node succ) { + exists(DataFlow::CallNode call | + pred = MkMethodAccessNode(call) and + succ = getForwardStartNode(call) + ) + or + exists(DataFlow::CallableNode callable | + pred = getBackwardEndNode(callable) and + succ = MkSinkNode(callable.getAReturnNode()) + ) + } + + cached + predicate entryPointEdge(EntryPoint entry, Node node) { + node = MkSinkNode(entry.getASink()) or + node = getForwardStartNode(entry.getASource()) or + node = MkMethodAccessNode(entry.getACall()) + } + } + + /** + * Holds if there is an edge from `pred` to `succ` in the API graph that is labeled with `lbl`. + */ + pragma[nomagic] + deprecated private predicate labelledEdge(Node pred, Label::ApiLabel lbl, Node succ) { + exists(string name | + Impl::memberEdge(pred, name, succ) and + lbl = Label::member(name) + ) + or + exists(string name | + Impl::methodEdge(pred, name, succ) and + lbl = Label::method(name) + ) + or + exists(DataFlow::Content content | + Impl::contentEdge(pred, content, succ) and + lbl = Label::content(content) + ) + or + exists(DataFlowDispatch::ParameterPosition pos | + Impl::parameterEdge(pred, pos, succ) and + lbl = Label::getLabelFromParameterPosition(pos) + ) + or + exists(DataFlowDispatch::ArgumentPosition pos | + Impl::argumentEdge(pred, pos, succ) and + lbl = Label::getLabelFromArgumentPosition(pos) + ) + or + Impl::instanceEdge(pred, succ) and + lbl = Label::instance() + or + Impl::returnEdge(pred, succ) and + lbl = Label::return() + or + exists(EntryPoint entry | + Impl::entryPointEdge(entry, succ) and + pred = root() and + lbl = Label::entryPoint(entry) + ) + } + + /** + * DEPRECATED. Treating the API graph as an explicit labelled graph is deprecated - instead use the methods on `API:Node` directly. + * + * Provides classes modeling the various edges (labels) in the API graph. + */ + deprecated module Label { /** All the possible labels in the API graph. */ - cached - newtype TLabel = + private newtype TLabel = MkLabelMember(string member) { member = any(ConstantReadAccess a).getName() } or MkLabelMethod(string m) { m = any(DataFlow::CallNode c).getMethodName() } or MkLabelReturn() or - MkLabelSubclass() or + MkLabelInstance() or MkLabelKeywordParameter(string name) { any(DataFlowDispatch::ArgumentPosition arg).isKeyword(name) or @@ -923,12 +1424,9 @@ module API { MkLabelBlockParameter() or MkLabelEntryPoint(EntryPoint name) or MkLabelContent(DataFlow::Content content) - } - /** Provides classes modeling the various edges (labels) in the API graph. */ - module Label { /** A label in the API-graph */ - class ApiLabel extends Impl::TLabel { + class ApiLabel extends TLabel { /** Gets a string representation of this label. */ string toString() { result = "???" } } @@ -967,9 +1465,9 @@ module API { override string toString() { result = "getReturn()" } } - /** A label for the subclass relationship. */ - class LabelSubclass extends ApiLabel, MkLabelSubclass { - override string toString() { result = "getASubclass()" } + /** A label for getting instances of a module/class. */ + class LabelInstance extends ApiLabel, MkLabelInstance { + override string toString() { result = "getInstance()" } } /** A label for a keyword parameter. */ @@ -1037,8 +1535,8 @@ module API { /** Gets the `return` edge label. */ LabelReturn return() { any() } - /** Gets the `subclass` edge label. */ - LabelSubclass subclass() { any() } + /** Gets the `instance` edge label. */ + LabelInstance instance() { any() } /** Gets the label representing the given keyword argument/parameter. */ LabelKeywordParameter keywordParameter(string name) { result.getName() = name } diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll index a98238e85d9..7772e5235d7 100644 --- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll +++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll @@ -6,12 +6,17 @@ private import codeql.ruby.typetracking.TypeTracker private import codeql.ruby.dataflow.SSA private import FlowSummaryImpl as FlowSummaryImpl private import SsaImpl as SsaImpl +private import codeql.ruby.ApiGraphs /** * An element, viewed as a node in a data flow graph. Either an expression * (`ExprNode`) or a parameter (`ParameterNode`). */ class Node extends TNode { + /** Starts backtracking from this node using API graphs. */ + pragma[inline] + API::Node backtrack() { result = API::Internal::getNodeForBacktracking(this) } + /** Gets the expression corresponding to this node, if any. */ CfgNodes::ExprCfgNode asExpr() { result = this.(ExprNode).getExprNode() } @@ -76,6 +81,11 @@ class Node extends TNode { result.asCallableAstNode() = c.asCallable() ) } + + /** Gets the enclosing method, if any. */ + MethodNode getEnclosingMethod() { + result.asCallableAstNode() = this.asExpr().getExpr().getEnclosingMethod() + } } /** A data-flow node corresponding to a call in the control-flow graph. */ @@ -144,6 +154,18 @@ class CallNode extends LocalSourceNode, ExprNode { result.asExpr() = pair.getValue() ) } + + /** + * Gets a potential target of this call, if any. + */ + final CallableNode getATarget() { + result.asCallableAstNode() = this.asExpr().getExpr().(Call).getATarget() + } + + /** + * Holds if this is a `super` call. + */ + final predicate isSuperCall() { this.asExpr().getExpr() instanceof SuperCall } } /** @@ -217,6 +239,10 @@ class SelfParameterNode extends ParameterNode instanceof SelfParameterNodeImpl { class LocalSourceNode extends Node { LocalSourceNode() { isLocalSourceNode(this) } + /** Starts tracking this node forward using API graphs. */ + pragma[inline] + API::Node track() { result = API::Internal::getNodeForForwardTracking(this) } + /** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */ pragma[inline] predicate flowsTo(Node nodeTo) { hasLocalSource(nodeTo, this) } @@ -359,6 +385,11 @@ private module Cached { ) } + cached + predicate methodHasSuperCall(MethodNode method, CallNode call) { + call.isSuperCall() and method = call.getEnclosingMethod() + } + /** * A place in which a named constant can be looked up during constant lookup. */ @@ -387,6 +418,39 @@ private module Cached { result.asExpr().getExpr() = access } + /** + * Gets a module for which `constRef` is the reference to an ancestor module. + * + * For example, `M` is the ancestry target of `C` in the following examples: + * ```rb + * class M < C {} + * + * module M + * include C + * end + * + * module M + * prepend C + * end + * ``` + */ + private ModuleNode getAncestryTarget(ConstRef constRef) { result.getAnAncestorExpr() = constRef } + + /** + * Gets a scope in which a constant lookup may access the contents of the module referenced by `constRef`. + */ + cached + TConstLookupScope getATargetScope(ConstRef constRef) { + result = MkAncestorLookup(getAncestryTarget(constRef).getAnImmediateDescendent*()) + or + constRef.asConstantAccess() = any(ConstantAccess ac).getScopeExpr() and + result = MkQualifiedLookup(constRef.asConstantAccess()) + or + result = MkNestedLookup(getAncestryTarget(constRef)) + or + result = MkExactLookup(constRef.asConstantAccess().(Namespace).getModule()) + } + cached predicate forceCachingInSameStage() { any() } @@ -1028,6 +1092,33 @@ class ModuleNode instanceof Module { * this predicate. */ ModuleNode getNestedModule(string name) { result = super.getNestedModule(name) } + + /** + * Starts tracking the module object using API graphs. + * + * Concretely, this tracks forward from the following starting points: + * - A constant access that resolves to this module. + * - `self` in the module scope or in a singleton method of the module. + * - A call to `self.class` in an instance method of this module or an ancestor module. + */ + bindingset[this] + pragma[inline] + API::Node trackModule() { result = API::Internal::getModuleNode(this) } + + /** + * Starts tracking instances of this module forward using API graphs. + * + * Concretely, this tracks forward from the following starting points: + * - `self` in instance methods of this module and ancestor modules + * - Calls to `new` on the module object + * + * Note that this includes references to `self` in ancestor modules, but not in descendent modules. + * This is usually the desired behavior, particularly if this module was itself found using + * a call to `getADescendentModule()`. + */ + bindingset[this] + pragma[inline] + API::Node trackInstance() { result = API::Internal::getModuleInstance(this) } } /** @@ -1216,6 +1307,9 @@ class MethodNode extends CallableNode { /** Holds if this method is protected. */ predicate isProtected() { this.asCallableAstNode().isProtected() } + + /** Gets a `super` call in this method. */ + CallNode getASuperCall() { methodHasSuperCall(this, result) } } /** @@ -1334,24 +1428,6 @@ class ConstRef extends LocalSourceNode { not exists(access.getScopeExpr()) } - /** - * Gets a module for which this constant is the reference to an ancestor module. - * - * For example, `M` is the ancestry target of `C` in the following examples: - * ```rb - * class M < C {} - * - * module M - * include C - * end - * - * module M - * prepend C - * end - * ``` - */ - private ModuleNode getAncestryTarget() { result.getAnAncestorExpr() = this } - /** * Gets the known target module. * @@ -1359,22 +1435,6 @@ class ConstRef extends LocalSourceNode { */ private Module getExactTarget() { result.getAnImmediateReference() = access } - /** - * Gets a scope in which a constant lookup may access the contents of the module referenced by this constant. - */ - cached - private TConstLookupScope getATargetScope() { - forceCachingInSameStage() and - result = MkAncestorLookup(this.getAncestryTarget().getAnImmediateDescendent*()) - or - access = any(ConstantAccess ac).getScopeExpr() and - result = MkQualifiedLookup(access) - or - result = MkNestedLookup(this.getAncestryTarget()) - or - result = MkExactLookup(access.(Namespace).getModule()) - } - /** * Gets the scope expression, or the immediately enclosing `Namespace` (skipping over singleton classes). * @@ -1436,7 +1496,7 @@ class ConstRef extends LocalSourceNode { pragma[inline] ConstRef getConstant(string name) { exists(TConstLookupScope scope | - pragma[only_bind_into](scope) = pragma[only_bind_out](this).getATargetScope() and + pragma[only_bind_into](scope) = getATargetScope(pragma[only_bind_out](this)) and result.accesses(pragma[only_bind_out](scope), name) ) } @@ -1458,7 +1518,9 @@ class ConstRef extends LocalSourceNode { * end * ``` */ - ModuleNode getADescendentModule() { MkAncestorLookup(result) = this.getATargetScope() } + bindingset[this] + pragma[inline_late] + ModuleNode getADescendentModule() { MkAncestorLookup(result) = getATargetScope(this) } } /** diff --git a/ruby/ql/lib/codeql/ruby/experimental/UnicodeBypassValidationQuery.qll b/ruby/ql/lib/codeql/ruby/experimental/UnicodeBypassValidationQuery.qll index 5c24978c4c3..449e05ad9e8 100644 --- a/ruby/ql/lib/codeql/ruby/experimental/UnicodeBypassValidationQuery.qll +++ b/ruby/ql/lib/codeql/ruby/experimental/UnicodeBypassValidationQuery.qll @@ -91,19 +91,19 @@ class Configuration extends TaintTracking::Configuration { // unicode_utils exists(API::MethodAccessNode mac | mac = API::getTopLevelMember("UnicodeUtils").getMethod(["nfkd", "nfc", "nfd", "nfkc"]) and - sink = mac.getParameter(0).asSink() + sink = mac.getArgument(0).asSink() ) or // eprun exists(API::MethodAccessNode mac | mac = API::getTopLevelMember("Eprun").getMethod("normalize") and - sink = mac.getParameter(0).asSink() + sink = mac.getArgument(0).asSink() ) or // unf exists(API::MethodAccessNode mac | mac = API::getTopLevelMember("UNF").getMember("Normalizer").getMethod("normalize") and - sink = mac.getParameter(0).asSink() + sink = mac.getArgument(0).asSink() ) or // ActiveSupport::Multibyte::Chars @@ -113,7 +113,7 @@ class Configuration extends TaintTracking::Configuration { .getMember("Multibyte") .getMember("Chars") .getMethod("new") - .getCallNode() and + .asCall() and n = cn.getAMethodCall("normalize") and sink = cn.getArgument(0) ) diff --git a/ruby/ql/lib/codeql/ruby/experimental/ZipSlipCustomizations.qll b/ruby/ql/lib/codeql/ruby/experimental/ZipSlipCustomizations.qll index e94cabb414c..656ceedbe2b 100644 --- a/ruby/ql/lib/codeql/ruby/experimental/ZipSlipCustomizations.qll +++ b/ruby/ql/lib/codeql/ruby/experimental/ZipSlipCustomizations.qll @@ -89,7 +89,7 @@ module ZipSlip { // If argument refers to a string object, then it's a hardcoded path and // this file is safe. not zipOpen - .getCallNode() + .asCall() .getArgument(0) .getALocalSource() .getConstantValue() diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll index 31bdc42e350..a687837f8fd 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll @@ -83,8 +83,8 @@ class ActionControllerClass extends DataFlow::ClassNode { } } -private DataFlow::LocalSourceNode actionControllerInstance() { - result = any(ActionControllerClass cls).getSelf() +private API::Node actionControllerInstance() { + result = any(ActionControllerClass cls).getSelf().track() } /** @@ -222,19 +222,19 @@ private class ActionControllerRenderToCall extends RenderToCallImpl { } } +pragma[nomagic] +private DataFlow::CallNode renderCall() { + // ActionController#render is an alias for ActionController::Renderer#render + result = + [ + any(ActionControllerClass c).trackModule().getAMethodCall("render"), + any(ActionControllerClass c).trackModule().getReturn("renderer").getAMethodCall("render") + ] +} + /** A call to `ActionController::Renderer#render`. */ private class RendererRenderCall extends RenderCallImpl { - RendererRenderCall() { - this = - [ - // ActionController#render is an alias for ActionController::Renderer#render - any(ActionControllerClass c).getAnImmediateReference().getAMethodCall("render"), - any(ActionControllerClass c) - .getAnImmediateReference() - .getAMethodCall("renderer") - .getAMethodCall("render") - ].asExpr().getExpr() - } + RendererRenderCall() { this = renderCall().asExpr().getExpr() } } /** A call to `html_escape` from within a controller. */ @@ -260,6 +260,7 @@ class RedirectToCall extends MethodCall { this = controller .getSelf() + .track() .getAMethodCall(["redirect_to", "redirect_back", "redirect_back_or_to"]) .asExpr() .getExpr() @@ -600,9 +601,7 @@ private module ParamsSummaries { * response. */ private module Response { - DataFlow::LocalSourceNode response() { - result = actionControllerInstance().getAMethodCall("response") - } + API::Node response() { result = actionControllerInstance().getReturn("response") } class BodyWrite extends DataFlow::CallNode, Http::Server::HttpResponse::Range { BodyWrite() { this = response().getAMethodCall("body=") } @@ -628,7 +627,7 @@ private module Response { HeaderWrite() { // response.header[key] = val // response.headers[key] = val - this = response().getAMethodCall(["header", "headers"]).getAMethodCall("[]=") + this = response().getReturn(["header", "headers"]).getAMethodCall("[]=") or // response.set_header(key) = val // response[header] = val @@ -673,18 +672,12 @@ private module Response { } } -private class ActionControllerLoggerInstance extends DataFlow::Node { - ActionControllerLoggerInstance() { - this = actionControllerInstance().getAMethodCall("logger") - or - any(ActionControllerLoggerInstance i).(DataFlow::LocalSourceNode).flowsTo(this) - } -} - private class ActionControllerLoggingCall extends DataFlow::CallNode, Logging::Range { ActionControllerLoggingCall() { - this.getReceiver() instanceof ActionControllerLoggerInstance and - this.getMethodName() = ["debug", "error", "fatal", "info", "unknown", "warn"] + this = + actionControllerInstance() + .getReturn("logger") + .getAMethodCall(["debug", "error", "fatal", "info", "unknown", "warn"]) } // Note: this is identical to the definition `stdlib.Logger.LoggerInfoStyleCall`. diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActionMailbox.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActionMailbox.qll index 13607f67926..f237e42a9a9 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActionMailbox.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActionMailbox.qll @@ -27,8 +27,8 @@ module ActionMailbox { Mail() { this = [ - controller().getAnInstanceSelf().getAMethodCall("inbound_email").getAMethodCall("mail"), - controller().getAnInstanceSelf().getAMethodCall("mail") + controller().trackInstance().getReturn("inbound_email").getAMethodCall("mail"), + controller().trackInstance().getAMethodCall("mail") ] } } @@ -40,7 +40,7 @@ module ActionMailbox { RemoteContent() { this = any(Mail m) - .(DataFlow::LocalSourceNode) + .track() .getAMethodCall([ "body", "to", "from", "raw_source", "subject", "from_address", "recipients_addresses", "cc_addresses", "bcc_addresses", "in_reply_to", diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActionMailer.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActionMailer.qll index af183333d3d..4884f46aac3 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActionMailer.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActionMailer.qll @@ -4,12 +4,26 @@ private import codeql.ruby.AST private import codeql.ruby.ApiGraphs +private import codeql.ruby.DataFlow private import codeql.ruby.frameworks.internal.Rails /** * Provides modeling for the `ActionMailer` library. */ module ActionMailer { + private DataFlow::ClassNode actionMailerClass() { + result = + [ + DataFlow::getConstant("ActionMailer").getConstant("Base"), + // In Rails applications `ApplicationMailer` typically extends + // `ActionMailer::Base`, but we treat it separately in case the + // `ApplicationMailer` definition is not in the database. + DataFlow::getConstant("ApplicationMailer") + ].getADescendentModule() + } + + private API::Node actionMailerInstance() { result = actionMailerClass().trackInstance() } + /** * A `ClassDeclaration` for a class that extends `ActionMailer::Base`. * For example, @@ -21,33 +35,11 @@ module ActionMailer { * ``` */ class MailerClass extends ClassDeclaration { - MailerClass() { - this.getSuperclassExpr() = - [ - API::getTopLevelMember("ActionMailer").getMember("Base"), - // In Rails applications `ApplicationMailer` typically extends - // `ActionMailer::Base`, but we treat it separately in case the - // `ApplicationMailer` definition is not in the database. - API::getTopLevelMember("ApplicationMailer") - ].getASubclass().getAValueReachableFromSource().asExpr().getExpr() - } - } - - /** A method call with a `self` receiver from within a mailer class */ - private class ContextCall extends MethodCall { - private MailerClass mailerClass; - - ContextCall() { - this.getReceiver() instanceof SelfVariableAccess and - this.getEnclosingModule() = mailerClass - } - - /** Gets the mailer class containing this method. */ - MailerClass getMailerClass() { result = mailerClass } + MailerClass() { this = actionMailerClass().getADeclaration() } } /** A call to `params` from within a mailer. */ - class ParamsCall extends ContextCall, ParamsCallImpl { - ParamsCall() { this.getMethodName() = "params" } + class ParamsCall extends ParamsCallImpl { + ParamsCall() { this = actionMailerInstance().getAMethodCall("params").asExpr().getExpr() } } } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll index fcca078f933..4a5d342eeba 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll @@ -30,10 +30,8 @@ private predicate isBuiltInMethodForActiveRecordModelInstance(string methodName) methodName = objectInstanceMethodName() } -private API::Node activeRecordClassApiNode() { +private API::Node activeRecordBaseClass() { result = - // class Foo < ActiveRecord::Base - // class Bar < Foo [ API::getTopLevelMember("ActiveRecord").getMember("Base"), // In Rails applications `ApplicationRecord` typically extends `ActiveRecord::Base`, but we @@ -42,6 +40,46 @@ private API::Node activeRecordClassApiNode() { ] } +/** + * Gets an object with methods from the ActiveRecord query interface. + */ +private API::Node activeRecordQueryBuilder() { + result = activeRecordBaseClass() + or + result = activeRecordBaseClass().getInstance() + or + // Assume any method call might return an ActiveRecord::Relation + // These are dynamically generated + result = activeRecordQueryBuilderMethodAccess(_).getReturn() +} + +/** Gets a call targeting the ActiveRecord query interface. */ +private API::MethodAccessNode activeRecordQueryBuilderMethodAccess(string name) { + result = activeRecordQueryBuilder().getMethod(name) and + // Due to the heuristic tracking of query builder objects, add a restriction for methods with a known call target + not isUnlikelyExternalCall(result) +} + +/** Gets a call targeting the ActiveRecord query interface. */ +private DataFlow::CallNode activeRecordQueryBuilderCall(string name) { + result = activeRecordQueryBuilderMethodAccess(name).asCall() +} + +/** + * Holds if `call` is unlikely to call into an external library, since it has a possible + * call target in its enclosing module. + */ +private predicate isUnlikelyExternalCall(API::MethodAccessNode node) { + exists(DataFlow::ModuleNode mod, DataFlow::CallNode call | call = node.asCall() | + call.getATarget() = [mod.getAnOwnSingletonMethod(), mod.getAnOwnInstanceMethod()] and + call.getEnclosingMethod() = [mod.getAnOwnSingletonMethod(), mod.getAnOwnInstanceMethod()] + ) +} + +private API::Node activeRecordConnectionInstance() { + result = activeRecordBaseClass().getReturn("connection") +} + /** * A `ClassDeclaration` for a class that inherits from `ActiveRecord::Base`. For example, * @@ -55,20 +93,19 @@ private API::Node activeRecordClassApiNode() { * ``` */ class ActiveRecordModelClass extends ClassDeclaration { + private DataFlow::ClassNode cls; + ActiveRecordModelClass() { - this.getSuperclassExpr() = - activeRecordClassApiNode().getASubclass().getAValueReachableFromSource().asExpr().getExpr() + cls = activeRecordBaseClass().getADescendentModule() and this = cls.getADeclaration() } // Gets the class declaration for this class and all of its super classes - private ModuleBase getAllClassDeclarations() { - result = this.getModule().getSuperClass*().getADeclaration() - } + private ModuleBase getAllClassDeclarations() { result = cls.getAnAncestor().getADeclaration() } /** * Gets methods defined in this class that may access a field from the database. */ - Method getAPotentialFieldAccessMethod() { + deprecated Method getAPotentialFieldAccessMethod() { // It's a method on this class or one of its super classes result = this.getAllClassDeclarations().getAMethod() and // There is a value that can be returned by this method which may include field data @@ -90,58 +127,84 @@ class ActiveRecordModelClass extends ClassDeclaration { ) ) } + + /** Gets the class as a `DataFlow::ClassNode`. */ + DataFlow::ClassNode getClassNode() { result = cls } } -/** A class method call whose receiver is an `ActiveRecordModelClass`. */ -class ActiveRecordModelClassMethodCall extends MethodCall { - private ActiveRecordModelClass recvCls; +/** + * Gets a potential reference to an ActiveRecord class object. + */ +deprecated private API::Node getAnActiveRecordModelClassRef() { + result = any(ActiveRecordModelClass cls).getClassNode().trackModule() + or + // For methods with an unknown call target, assume this might be a database field, thus returning another ActiveRecord object. + // In this case we do not know which class it belongs to, which is why this predicate can't associate the reference with a specific class. + result = getAnUnknownActiveRecordModelClassCall().getReturn() +} +/** + * Gets a call performed on an ActiveRecord class object, without a known call target in the codebase. + */ +deprecated private API::MethodAccessNode getAnUnknownActiveRecordModelClassCall() { + result = getAnActiveRecordModelClassRef().getMethod(_) and + result.asCall().asExpr().getExpr() instanceof UnknownMethodCall +} + +/** + * DEPRECATED. Use `ActiveRecordModelClass.getClassNode().trackModule().getMethod()` instead. + * + * A class method call whose receiver is an `ActiveRecordModelClass`. + */ +deprecated class ActiveRecordModelClassMethodCall extends MethodCall { ActiveRecordModelClassMethodCall() { - // e.g. Foo.where(...) - recvCls.getModule() = this.getReceiver().(ConstantReadAccess).getModule() - or - // e.g. Foo.joins(:bars).where(...) - recvCls = this.getReceiver().(ActiveRecordModelClassMethodCall).getReceiverClass() - or - // e.g. self.where(...) within an ActiveRecordModelClass - this.getReceiver() instanceof SelfVariableAccess and - this.getEnclosingModule() = recvCls + this = getAnUnknownActiveRecordModelClassCall().asCall().asExpr().getExpr() } - /** The `ActiveRecordModelClass` of the receiver of this method. */ - ActiveRecordModelClass getReceiverClass() { result = recvCls } + /** Gets the `ActiveRecordModelClass` of the receiver of this method, if it can be determined. */ + ActiveRecordModelClass getReceiverClass() { + this = result.getClassNode().trackModule().getMethod(_).asCall().asExpr().getExpr() + } } -private Expr sqlFragmentArgument(MethodCall call) { - exists(string methodName | - methodName = call.getMethodName() and - ( - methodName = - [ - "delete_all", "delete_by", "destroy_all", "destroy_by", "exists?", "find_by", "find_by!", - "find_or_create_by", "find_or_create_by!", "find_or_initialize_by", "find_by_sql", "from", - "group", "having", "joins", "lock", "not", "order", "reorder", "pluck", "where", - "rewhere", "select", "reselect", "update_all" - ] and - result = call.getArgument(0) - or - methodName = "calculate" and result = call.getArgument(1) - or - methodName in ["average", "count", "maximum", "minimum", "sum", "count_by_sql"] and - result = call.getArgument(0) - or - // This format was supported until Rails 2.3.8 - methodName = ["all", "find", "first", "last"] and - result = call.getKeywordArgument("conditions") - or - methodName = "reload" and - result = call.getKeywordArgument("lock") - or - // Calls to `annotate` can be used to add block comments to SQL queries. These are potentially vulnerable to - // SQLi if user supplied input is passed in as an argument. - methodName = "annotate" and - result = call.getArgument(_) - ) +private predicate sqlFragmentArgumentInner(DataFlow::CallNode call, DataFlow::Node sink) { + call = + activeRecordQueryBuilderCall([ + "delete_all", "delete_by", "destroy_all", "destroy_by", "exists?", "find_by", "find_by!", + "find_or_create_by", "find_or_create_by!", "find_or_initialize_by", "find_by_sql", "from", + "group", "having", "joins", "lock", "not", "order", "reorder", "pluck", "where", "rewhere", + "select", "reselect", "update_all" + ]) and + sink = call.getArgument(0) + or + call = activeRecordQueryBuilderCall("calculate") and + sink = call.getArgument(1) + or + call = + activeRecordQueryBuilderCall(["average", "count", "maximum", "minimum", "sum", "count_by_sql"]) and + sink = call.getArgument(0) + or + // This format was supported until Rails 2.3.8 + call = activeRecordQueryBuilderCall(["all", "find", "first", "last"]) and + sink = call.getKeywordArgument("conditions") + or + call = activeRecordQueryBuilderCall("reload") and + sink = call.getKeywordArgument("lock") + or + // Calls to `annotate` can be used to add block comments to SQL queries. These are potentially vulnerable to + // SQLi if user supplied input is passed in as an argument. + call = activeRecordQueryBuilderCall("annotate") and + sink = call.getArgument(_) + or + call = activeRecordConnectionInstance().getAMethodCall("execute") and + sink = call.getArgument(0) +} + +private predicate sqlFragmentArgument(DataFlow::CallNode call, DataFlow::Node sink) { + exists(DataFlow::Node arg | + sqlFragmentArgumentInner(call, arg) and + sink = [arg, arg.(DataFlow::ArrayLiteralNode).getElement(0)] and + unsafeSqlExpr(sink.asExpr().getExpr()) ) } @@ -162,6 +225,8 @@ private predicate unsafeSqlExpr(Expr sqlFragmentExpr) { } /** + * DEPRECATED. Use the `SqlExecution` concept or `ActiveRecordSqlExecutionRange`. + * * A method call that may result in executing unintended user-controlled SQL * queries if the `getSqlFragmentSinkArgument()` expression is tainted by * unsanitized user-controlled input. For example, supposing that `User` is an @@ -175,55 +240,32 @@ private predicate unsafeSqlExpr(Expr sqlFragmentExpr) { * as `"') OR 1=1 --"` could result in the application looking up all users * rather than just one with a matching name. */ -class PotentiallyUnsafeSqlExecutingMethodCall extends ActiveRecordModelClassMethodCall { - // The SQL fragment argument itself - private Expr sqlFragmentExpr; +deprecated class PotentiallyUnsafeSqlExecutingMethodCall extends ActiveRecordModelClassMethodCall { + private DataFlow::CallNode call; PotentiallyUnsafeSqlExecutingMethodCall() { - exists(Expr arg | - arg = sqlFragmentArgument(this) and - unsafeSqlExpr(sqlFragmentExpr) and - ( - sqlFragmentExpr = arg - or - sqlFragmentExpr = arg.(ArrayLiteral).getElement(0) - ) and - // Check that method has not been overridden - not exists(SingletonMethod m | - m.getName() = this.getMethodName() and - m.getOuterScope() = this.getReceiverClass() - ) - ) + call.asExpr().getExpr() = this and sqlFragmentArgument(call, _) } /** * Gets the SQL fragment argument of this method call. */ - Expr getSqlFragmentSinkArgument() { result = sqlFragmentExpr } + Expr getSqlFragmentSinkArgument() { + exists(DataFlow::Node sink | + sqlFragmentArgument(call, sink) and result = sink.asExpr().getExpr() + ) + } } /** - * An `SqlExecution::Range` for an argument to a - * `PotentiallyUnsafeSqlExecutingMethodCall` that may be vulnerable to being - * controlled by user input. + * A SQL execution arising from a call to the ActiveRecord library. */ class ActiveRecordSqlExecutionRange extends SqlExecution::Range { - ActiveRecordSqlExecutionRange() { - exists(PotentiallyUnsafeSqlExecutingMethodCall mc | - this.asExpr().getNode() = mc.getSqlFragmentSinkArgument() - ) - or - this = activeRecordConnectionInstance().getAMethodCall("execute").getArgument(0) and - unsafeSqlExpr(this.asExpr().getExpr()) - } + ActiveRecordSqlExecutionRange() { sqlFragmentArgument(_, this) } override DataFlow::Node getSql() { result = this } } -private API::Node activeRecordConnectionInstance() { - result = activeRecordClassApiNode().getReturn("connection") -} - // TODO: model `ActiveRecord` sanitizers // https://api.rubyonrails.org/classes/ActiveRecord/Sanitization/ClassMethods.html /** @@ -241,15 +283,8 @@ abstract class ActiveRecordModelInstantiation extends OrmInstantiation::Range, override predicate methodCallMayAccessField(string methodName) { // The method is not a built-in, and... not isBuiltInMethodForActiveRecordModelInstance(methodName) and - ( - // ...There is no matching method definition in the class, or... - not exists(this.getClass().getMethod(methodName)) - or - // ...the called method can access a field. - exists(Method m | m = this.getClass().getAPotentialFieldAccessMethod() | - m.getName() = methodName - ) - ) + // ...There is no matching method definition in the class + not exists(this.getClass().getMethod(methodName)) } } @@ -317,21 +352,10 @@ private class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation } // A `self` reference that may resolve to an active record model object -private class ActiveRecordModelClassSelfReference extends ActiveRecordModelInstantiation, - DataFlow::SelfParameterNode -{ +private class ActiveRecordModelClassSelfReference extends ActiveRecordModelInstantiation { private ActiveRecordModelClass cls; - ActiveRecordModelClassSelfReference() { - exists(MethodBase m | - m = this.getCallable() and - m.getEnclosingModule() = cls and - m = cls.getAMethod() - ) and - // In a singleton method, `self` refers to the class itself rather than an - // instance of that class - not this.getSelfVariable().getDeclaringScope() instanceof SingletonMethod - } + ActiveRecordModelClassSelfReference() { this = cls.getClassNode().getAnOwnInstanceSelf() } final override ActiveRecordModelClass getClass() { result = cls } } @@ -342,7 +366,7 @@ private class ActiveRecordModelClassSelfReference extends ActiveRecordModelInsta class ActiveRecordInstance extends DataFlow::Node { private ActiveRecordModelInstantiation instantiation; - ActiveRecordInstance() { this = instantiation or instantiation.flowsTo(this) } + ActiveRecordInstance() { this = instantiation.track().getAValueReachableFromSource() } /** Gets the `ActiveRecordModelClass` that this is an instance of. */ ActiveRecordModelClass getClass() { result = instantiation.getClass() } @@ -380,12 +404,12 @@ private module Persistence { /** A call to e.g. `User.create(name: "foo")` */ private class CreateLikeCall extends DataFlow::CallNode, PersistentWriteAccess::Range { CreateLikeCall() { - exists(this.asExpr().getExpr().(ActiveRecordModelClassMethodCall).getReceiverClass()) and - this.getMethodName() = - [ - "create", "create!", "create_or_find_by", "create_or_find_by!", "find_or_create_by", - "find_or_create_by!", "insert", "insert!" - ] + this = + activeRecordBaseClass() + .getAMethodCall([ + "create", "create!", "create_or_find_by", "create_or_find_by!", "find_or_create_by", + "find_or_create_by!", "insert", "insert!" + ]) } override DataFlow::Node getValue() { @@ -402,8 +426,7 @@ private module Persistence { /** A call to e.g. `User.update(1, name: "foo")` */ private class UpdateLikeClassMethodCall extends DataFlow::CallNode, PersistentWriteAccess::Range { UpdateLikeClassMethodCall() { - exists(this.asExpr().getExpr().(ActiveRecordModelClassMethodCall).getReceiverClass()) and - this.getMethodName() = ["update", "update!", "upsert"] + this = activeRecordBaseClass().getAMethodCall(["update", "update!", "upsert"]) } override DataFlow::Node getValue() { @@ -448,10 +471,7 @@ private module Persistence { * ``` */ private class TouchAllCall extends DataFlow::CallNode, PersistentWriteAccess::Range { - TouchAllCall() { - exists(this.asExpr().getExpr().(ActiveRecordModelClassMethodCall).getReceiverClass()) and - this.getMethodName() = "touch_all" - } + TouchAllCall() { this = activeRecordQueryBuilderCall("touch_all") } override DataFlow::Node getValue() { result = this.getKeywordArgument("time") } } @@ -461,8 +481,7 @@ private module Persistence { private ExprNodes::ArrayLiteralCfgNode arr; InsertAllLikeCall() { - exists(this.asExpr().getExpr().(ActiveRecordModelClassMethodCall).getReceiverClass()) and - this.getMethodName() = ["insert_all", "insert_all!", "upsert_all"] and + this = activeRecordBaseClass().getAMethodCall(["insert_all", "insert_all!", "upsert_all"]) and arr = this.getArgument(0).asExpr() } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActiveResource.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActiveResource.qll index 96219915770..9f0e0f4b859 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActiveResource.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActiveResource.qll @@ -18,8 +18,12 @@ module ActiveResource { * An ActiveResource model class. This is any (transitive) subclass of ActiveResource. */ pragma[nomagic] - private API::Node modelApiNode() { - result = API::getTopLevelMember("ActiveResource").getMember("Base").getASubclass() + private API::Node activeResourceBaseClass() { + result = API::getTopLevelMember("ActiveResource").getMember("Base") + } + + private DataFlow::ClassNode activeResourceClass() { + result = activeResourceBaseClass().getADescendentModule() } /** @@ -30,16 +34,8 @@ module ActiveResource { * end * ``` */ - class ModelClass extends ClassDeclaration { - API::Node model; - - ModelClass() { - model = modelApiNode() and - this.getSuperclassExpr() = model.getAValueReachableFromSource().asExpr().getExpr() - } - - /** Gets the API node for this model */ - API::Node getModelApiNode() { result = model } + class ModelClassNode extends DataFlow::ClassNode { + ModelClassNode() { this = activeResourceClass() } /** Gets a call to `site=`, which sets the base URL for this model. */ SiteAssignCall getASiteAssignment() { result.getModelClass() = this } @@ -49,6 +45,46 @@ module ActiveResource { c = this.getASiteAssignment() and c.disablesCertificateValidation() } + + /** Gets a method call on this class that returns an instance of the class. */ + private DataFlow::CallNode getAChainedCall() { + result.(FindCall).getModelClass() = this + or + result.(CreateCall).getModelClass() = this + or + result.(CustomHttpCall).getModelClass() = this + or + result.(CollectionCall).getCollection().getModelClass() = this and + result.getMethodName() = ["first", "last"] + } + + /** Gets an API node referring to an instance of this class. */ + API::Node getAnInstanceReference() { + result = this.trackInstance() + or + result = this.getAChainedCall().track() + } + } + + /** DEPRECATED. Use `ModelClassNode` instead. */ + deprecated class ModelClass extends ClassDeclaration { + private ModelClassNode cls; + + ModelClass() { this = cls.getADeclaration() } + + /** Gets the class for which this is a declaration. */ + ModelClassNode getClassNode() { result = cls } + + /** Gets the API node for this class object. */ + deprecated API::Node getModelApiNode() { result = cls.trackModule() } + + /** Gets a call to `site=`, which sets the base URL for this model. */ + SiteAssignCall getASiteAssignment() { result = cls.getASiteAssignment() } + + /** Holds if `c` sets a base URL which does not use HTTPS. */ + predicate disablesCertificateValidation(SiteAssignCall c) { + cls.disablesCertificateValidation(c) + } } /** @@ -62,25 +98,20 @@ module ActiveResource { * ``` */ class ModelClassMethodCall extends DataFlow::CallNode { - API::Node model; + private ModelClassNode cls; - ModelClassMethodCall() { - model = modelApiNode() and - this = classMethodCall(model, _) - } + ModelClassMethodCall() { this = cls.trackModule().getAMethodCall(_) } /** Gets the model class for this call. */ - ModelClass getModelClass() { result.getModelApiNode() = model } + ModelClassNode getModelClass() { result = cls } } /** * A call to `site=` on an ActiveResource model class. * This sets the base URL for all HTTP requests made by this class. */ - private class SiteAssignCall extends DataFlow::CallNode { - API::Node model; - - SiteAssignCall() { model = modelApiNode() and this = classMethodCall(model, "site=") } + private class SiteAssignCall extends ModelClassMethodCall { + SiteAssignCall() { this.getMethodName() = "site=" } /** * Gets a node that contributes to the URLs used for HTTP requests by the parent @@ -88,12 +119,10 @@ module ActiveResource { */ DataFlow::Node getAUrlPart() { result = this.getArgument(0) } - /** Gets the model class for this call. */ - ModelClass getModelClass() { result.getModelApiNode() = model } - /** Holds if this site value specifies HTTP rather than HTTPS. */ predicate disablesCertificateValidation() { this.getAUrlPart() + // TODO: We should not need all this just to get the string value .asExpr() .(ExprNodes::AssignExprCfgNode) .getRhs() @@ -141,87 +170,70 @@ module ActiveResource { } /** + * DEPRECATED. Use `ModelClassNode.getAnInstanceReference()` instead. + * * An ActiveResource model object. */ - class ModelInstance extends DataFlow::Node { - ModelClass cls; + deprecated class ModelInstance extends DataFlow::Node { + private ModelClassNode cls; - ModelInstance() { - exists(API::Node model | model = modelApiNode() | - this = model.getInstance().getAValueReachableFromSource() and - cls.getModelApiNode() = model - ) - or - exists(FindCall call | call.flowsTo(this) | cls = call.getModelClass()) - or - exists(CreateCall call | call.flowsTo(this) | cls = call.getModelClass()) - or - exists(CustomHttpCall call | call.flowsTo(this) | cls = call.getModelClass()) - or - exists(CollectionCall call | - call.getMethodName() = ["first", "last"] and - call.flowsTo(this) - | - cls = call.getCollection().getModelClass() - ) - } + ModelInstance() { this = cls.getAnInstanceReference().getAValueReachableFromSource() } /** Gets the model class for this instance. */ - ModelClass getModelClass() { result = cls } + ModelClassNode getModelClass() { result = cls } } /** * A call to a method on an ActiveResource model object. */ class ModelInstanceMethodCall extends DataFlow::CallNode { - ModelInstance i; + private ModelClassNode cls; - ModelInstanceMethodCall() { this.getReceiver() = i } + ModelInstanceMethodCall() { this = cls.getAnInstanceReference().getAMethodCall(_) } /** Gets the model instance for this call. */ - ModelInstance getInstance() { result = i } + deprecated ModelInstance getInstance() { result = this.getReceiver() } /** Gets the model class for this call. */ - ModelClass getModelClass() { result = i.getModelClass() } + ModelClassNode getModelClass() { result = cls } } /** - * A collection of ActiveResource model objects. + * DEPRECATED. Use `CollectionSource` instead. + * + * A data flow node that may refer to a collection of ActiveResource model objects. */ - class Collection extends DataFlow::Node { - ModelClassMethodCall classMethodCall; + deprecated class Collection extends DataFlow::Node { + Collection() { this = any(CollectionSource src).track().getAValueReachableFromSource() } + } - Collection() { - classMethodCall.flowsTo(this) and - ( - classMethodCall.getMethodName() = "all" - or - classMethodCall.getMethodName() = "find" and - classMethodCall.getArgument(0).asExpr().getConstantValue().isStringlikeValue("all") - ) + /** + * A call that returns a collection of ActiveResource model objects. + */ + class CollectionSource extends ModelClassMethodCall { + CollectionSource() { + this.getMethodName() = "all" + or + this.getArgument(0).asExpr().getConstantValue().isStringlikeValue("all") } - - /** Gets the model class for this collection. */ - ModelClass getModelClass() { result = classMethodCall.getModelClass() } } /** * A method call on a collection. */ class CollectionCall extends DataFlow::CallNode { - CollectionCall() { this.getReceiver() instanceof Collection } + private CollectionSource collection; + + CollectionCall() { this = collection.track().getAMethodCall(_) } /** Gets the collection for this call. */ - Collection getCollection() { result = this.getReceiver() } + CollectionSource getCollection() { result = collection } } private class ModelClassMethodCallAsHttpRequest extends Http::Client::Request::Range, ModelClassMethodCall { - ModelClass cls; - ModelClassMethodCallAsHttpRequest() { - this.getModelClass() = cls and this.getMethodName() = ["all", "build", "create", "create!", "find", "first", "last"] } @@ -230,12 +242,14 @@ module ActiveResource { override predicate disablesCertificateValidation( DataFlow::Node disablingNode, DataFlow::Node argumentOrigin ) { - cls.disablesCertificateValidation(disablingNode) and + this.getModelClass().disablesCertificateValidation(disablingNode) and // TODO: highlight real argument origin argumentOrigin = disablingNode } - override DataFlow::Node getAUrlPart() { result = cls.getASiteAssignment().getAUrlPart() } + override DataFlow::Node getAUrlPart() { + result = this.getModelClass().getASiteAssignment().getAUrlPart() + } override DataFlow::Node getResponseBody() { result = this } } @@ -243,10 +257,7 @@ module ActiveResource { private class ModelInstanceMethodCallAsHttpRequest extends Http::Client::Request::Range, ModelInstanceMethodCall { - ModelClass cls; - ModelInstanceMethodCallAsHttpRequest() { - this.getModelClass() = cls and this.getMethodName() = [ "exists?", "reload", "save", "save!", "destroy", "delete", "get", "patch", "post", "put", @@ -259,42 +270,15 @@ module ActiveResource { override predicate disablesCertificateValidation( DataFlow::Node disablingNode, DataFlow::Node argumentOrigin ) { - cls.disablesCertificateValidation(disablingNode) and + this.getModelClass().disablesCertificateValidation(disablingNode) and // TODO: highlight real argument origin argumentOrigin = disablingNode } - override DataFlow::Node getAUrlPart() { result = cls.getASiteAssignment().getAUrlPart() } + override DataFlow::Node getAUrlPart() { + result = this.getModelClass().getASiteAssignment().getAUrlPart() + } override DataFlow::Node getResponseBody() { result = this } } - - /** - * A call to a class method. - * - * TODO: is this general enough to be useful elsewhere? - * - * Examples: - * ```rb - * class A - * def self.m; end - * - * m # call - * end - * - * A.m # call - * ``` - */ - private DataFlow::CallNode classMethodCall(API::Node classNode, string methodName) { - // A.m - result = classNode.getAMethodCall(methodName) - or - // class A - // A.m - // end - result.getReceiver().asExpr() instanceof ExprNodes::SelfVariableAccessCfgNode and - result.asExpr().getExpr().getEnclosingModule().(ClassDeclaration).getSuperclassExpr() = - classNode.getAValueReachableFromSource().asExpr().getExpr() and - result.getMethodName() = methodName - } } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/GraphQL.qll b/ruby/ql/lib/codeql/ruby/frameworks/GraphQL.qll index 8e673c4255d..98fbe241404 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/GraphQL.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/GraphQL.qll @@ -39,13 +39,8 @@ private API::Node graphQlSchema() { result = API::getTopLevelMember("GraphQL").g */ private class GraphqlRelayClassicMutationClass extends ClassDeclaration { GraphqlRelayClassicMutationClass() { - this.getSuperclassExpr() = - graphQlSchema() - .getMember("RelayClassicMutation") - .getASubclass() - .getAValueReachableFromSource() - .asExpr() - .getExpr() + this = + graphQlSchema().getMember("RelayClassicMutation").getADescendentModule().getADeclaration() } } @@ -74,13 +69,7 @@ private class GraphqlRelayClassicMutationClass extends ClassDeclaration { */ private class GraphqlSchemaResolverClass extends ClassDeclaration { GraphqlSchemaResolverClass() { - this.getSuperclassExpr() = - graphQlSchema() - .getMember("Resolver") - .getASubclass() - .getAValueReachableFromSource() - .asExpr() - .getExpr() + this = graphQlSchema().getMember("Resolver").getADescendentModule().getADeclaration() } } @@ -103,13 +92,7 @@ private string getASupportedHttpMethod() { result = ["get", "post"] } */ class GraphqlSchemaObjectClass extends ClassDeclaration { GraphqlSchemaObjectClass() { - this.getSuperclassExpr() = - graphQlSchema() - .getMember("Object") - .getASubclass() - .getAValueReachableFromSource() - .asExpr() - .getExpr() + this = graphQlSchema().getMember("Object").getADescendentModule().getADeclaration() } /** Gets a `GraphqlFieldDefinitionMethodCall` called in this class. */ diff --git a/ruby/ql/lib/codeql/ruby/frameworks/Sqlite3.qll b/ruby/ql/lib/codeql/ruby/frameworks/Sqlite3.qll index 77e26ca13d7..981ace2e7da 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/Sqlite3.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/Sqlite3.qll @@ -15,21 +15,20 @@ private import codeql.ruby.Concepts * https://github.com/sparklemotion/sqlite3-ruby */ module Sqlite3 { + private API::Node databaseConst() { + result = API::getTopLevelMember("SQLite3").getMember("Database") + } + + private API::Node dbInstance() { + result = databaseConst().getInstance() + or + // e.g. SQLite3::Database.new("foo.db") |db| { db.some_method } + result = databaseConst().getMethod("new").getBlock().getParameter(0) + } + /** Gets a method call with a receiver that is a database instance. */ private DataFlow::CallNode getADatabaseMethodCall(string methodName) { - exists(API::Node dbInstance | - dbInstance = API::getTopLevelMember("SQLite3").getMember("Database").getInstance() and - ( - result = dbInstance.getAMethodCall(methodName) - or - // e.g. SQLite3::Database.new("foo.db") |db| { db.some_method } - exists(DataFlow::BlockNode block | - result.getMethodName() = methodName and - block = dbInstance.getAValueReachableFromSource().(DataFlow::CallNode).getBlock() and - block.getParameter(0).flowsTo(result.getReceiver()) - ) - ) - ) + result = dbInstance().getAMethodCall(methodName) } /** A prepared but unexecuted SQL statement. */ diff --git a/ruby/ql/lib/codeql/ruby/frameworks/Twirp.qll b/ruby/ql/lib/codeql/ruby/frameworks/Twirp.qll index 73ef87f5fd5..7b8648bd2b1 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/Twirp.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/Twirp.qll @@ -16,50 +16,28 @@ module Twirp { /** * A Twirp service instantiation */ - class ServiceInstantiation extends DataFlow::CallNode { + deprecated class ServiceInstantiation extends DataFlow::CallNode { ServiceInstantiation() { this = API::getTopLevelMember("Twirp").getMember("Service").getAnInstantiation() } - /** - * Gets a local source node for the Service instantiation argument (the service handler). - */ - private DataFlow::LocalSourceNode getHandlerSource() { - result = this.getArgument(0).getALocalSource() - } - - /** - * Gets the API::Node for the service handler's class. - */ - private API::Node getAHandlerClassApiNode() { - result.getAnInstantiation() = this.getHandlerSource() - } - - /** - * Gets the AST module for the service handler's class. - */ - private Ast::Module getAHandlerClassAstNode() { - result = - this.getAHandlerClassApiNode() - .asSource() - .asExpr() - .(CfgNodes::ExprNodes::ConstantReadAccessCfgNode) - .getExpr() - .getModule() - } - /** * Gets a handler's method. */ - Ast::Method getAHandlerMethod() { - result = this.getAHandlerClassAstNode().getAnInstanceMethod() + DataFlow::MethodNode getAHandlerMethodNode() { + result = this.getArgument(0).backtrack().getMethod(_).asCallable() } + + /** + * Gets a handler's method as an AST node. + */ + Ast::Method getAHandlerMethod() { result = this.getAHandlerMethodNode().asCallableAstNode() } } /** * A Twirp client */ - class ClientInstantiation extends DataFlow::CallNode { + deprecated class ClientInstantiation extends DataFlow::CallNode { ClientInstantiation() { this = API::getTopLevelMember("Twirp").getMember("Client").getAnInstantiation() } @@ -67,7 +45,10 @@ module Twirp { /** The URL of a Twirp service, considered as a sink. */ class ServiceUrlAsSsrfSink extends ServerSideRequestForgery::Sink { - ServiceUrlAsSsrfSink() { exists(ClientInstantiation c | c.getArgument(0) = this) } + ServiceUrlAsSsrfSink() { + this = + API::getTopLevelMember("Twirp").getMember("Client").getMethod("new").getArgument(0).asSink() + } } /** A parameter that will receive parts of the url when handling an incoming request. */ @@ -75,7 +56,14 @@ module Twirp { DataFlow::ParameterNode { UnmarshaledParameter() { - exists(ServiceInstantiation i | i.getAHandlerMethod().getParameter(0) = this.asParameter()) + this = + API::getTopLevelMember("Twirp") + .getMember("Service") + .getMethod("new") + .getArgument(0) + .getMethod(_) + .getParameter(0) + .asSource() } override string getSourceType() { result = "Twirp Unmarhaled Parameter" } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/core/Gem.qll b/ruby/ql/lib/codeql/ruby/frameworks/core/Gem.qll index 4095beb10af..f7345b22ed1 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/core/Gem.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/core/Gem.qll @@ -24,7 +24,7 @@ module Gem { GemSpec() { this.getExtension() = "gemspec" and - specCall = API::root().getMember("Gem").getMember("Specification").getMethod("new") and + specCall = API::getTopLevelMember("Gem").getMember("Specification").getMethod("new") and specCall.getLocation().getFile() = this } @@ -42,7 +42,7 @@ module Gem { .getBlock() .getParameter(0) .getMethod(name + "=") - .getParameter(0) + .getArgument(0) .asSink() .asExpr() .getExpr() diff --git a/ruby/ql/lib/codeql/ruby/frameworks/core/Kernel.qll b/ruby/ql/lib/codeql/ruby/frameworks/core/Kernel.qll index b445521adb8..ad87ee37ecd 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/core/Kernel.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/core/Kernel.qll @@ -19,7 +19,8 @@ module Kernel { */ class KernelMethodCall extends DataFlow::CallNode { KernelMethodCall() { - this = API::getTopLevelMember("Kernel").getAMethodCall(_) + // Match Kernel calls using local flow, to avoid finding singleton calls on subclasses + this = DataFlow::getConstant("Kernel").getAMethodCall(_) or this.asExpr().getExpr() instanceof UnknownMethodCall and ( diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll index 2e3fa21a45b..21bc5f69dcb 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll @@ -44,7 +44,7 @@ private class SummarizedCallableFromModel extends SummarizedCallable { override Call getACall() { exists(API::MethodAccessNode base | ModelOutput::resolvedSummaryBase(type, path, base) and - result = base.getCallNode().asExpr().getExpr() + result = base.asCall().asExpr().getExpr() ) } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll index 4c03522a9c5..ed7a331c452 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -99,9 +99,10 @@ API::Node getExtraNodeFromPath(string type, AccessPath path, int n) { // A row of form `any;Method[foo]` should match any method named `foo`. type = "any" and n = 1 and - exists(EntryPointFromAnyType entry | - methodMatchedByName(path, entry.getName()) and - result = entry.getANode() + exists(string methodName, DataFlow::CallNode call | + methodMatchedByName(path, methodName) and + call.getMethodName() = methodName and + result.(API::MethodAccessNode).asCall() = call ) } @@ -112,20 +113,10 @@ API::Node getExtraNodeFromType(string type) { constRef = getConstantFromConstPath(consts) | suffix = "!" and - ( - result.(API::Node::Internal).asSourceInternal() = constRef - or - result.(API::Node::Internal).asSourceInternal() = - constRef.getADescendentModule().getAnOwnModuleSelf() - ) + result = constRef.track() or suffix = "" and - ( - result.(API::Node::Internal).asSourceInternal() = constRef.getAMethodCall("new") - or - result.(API::Node::Internal).asSourceInternal() = - constRef.getADescendentModule().getAnInstanceSelf() - ) + result = constRef.track().getInstance() ) or type = "" and @@ -145,21 +136,6 @@ private predicate methodMatchedByName(AccessPath path, string methodName) { ) } -/** - * An API graph entry point corresponding to a method name such as `foo` in `;any;Method[foo]`. - * - * This ensures that the API graph rooted in that method call is materialized. - */ -private class EntryPointFromAnyType extends API::EntryPoint { - string name; - - EntryPointFromAnyType() { this = "AnyMethod[" + name + "]" and methodMatchedByName(_, name) } - - override DataFlow::CallNode getACall() { result.getMethodName() = name } - - string getName() { result = name } -} - /** * Gets a Ruby-specific API graph successor of `node` reachable by resolving `token`. */ @@ -175,9 +151,11 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { result = node.getInstance() or token.getName() = "Parameter" and - result = - node.getASuccessor(API::Label::getLabelFromParameterPosition(FlowSummaryImplSpecific::parseArgBody(token - .getAnArgument()))) + exists(DataFlowDispatch::ArgumentPosition argPos, DataFlowDispatch::ParameterPosition paramPos | + argPos = FlowSummaryImplSpecific::parseParamBody(token.getAnArgument()) and + DataFlowDispatch::parameterMatch(paramPos, argPos) and + result = node.getParameterAtPosition(paramPos) + ) or exists(DataFlow::ContentSet contents | SummaryComponent::content(contents) = FlowSummaryImplSpecific::interpretComponentSpecific(token) and @@ -191,9 +169,11 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { bindingset[token] API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathToken token) { token.getName() = "Argument" and - result = - node.getASuccessor(API::Label::getLabelFromArgumentPosition(FlowSummaryImplSpecific::parseParamBody(token - .getAnArgument()))) + exists(DataFlowDispatch::ArgumentPosition argPos, DataFlowDispatch::ParameterPosition paramPos | + paramPos = FlowSummaryImplSpecific::parseArgBody(token.getAnArgument()) and + DataFlowDispatch::parameterMatch(paramPos, argPos) and + result = node.getArgumentAtPosition(argPos) + ) } /** @@ -211,7 +191,7 @@ predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathToke /** An API graph node representing a method call. */ class InvokeNode extends API::MethodAccessNode { /** Gets the number of arguments to the call. */ - int getNumArgument() { result = this.getCallNode().getNumberOfArguments() } + int getNumArgument() { result = this.asCall().getNumberOfArguments() } } /** Gets the `InvokeNode` corresponding to a specific invocation of `node`. */ diff --git a/ruby/ql/lib/codeql/ruby/security/KernelOpenQuery.qll b/ruby/ql/lib/codeql/ruby/security/KernelOpenQuery.qll index 31c9055d7cc..3653fb23cee 100644 --- a/ruby/ql/lib/codeql/ruby/security/KernelOpenQuery.qll +++ b/ruby/ql/lib/codeql/ruby/security/KernelOpenQuery.qll @@ -55,7 +55,8 @@ class AmbiguousPathCall extends DataFlow::CallNode { } private predicate methodCallOnlyOnIO(DataFlow::CallNode node, string methodName) { - node = API::getTopLevelMember("IO").getAMethodCall(methodName) and + // Use local flow to find calls to 'IO' without subclasses + node = DataFlow::getConstant("IO").getAMethodCall(methodName) and not node = API::getTopLevelMember("File").getAMethodCall(methodName) // needed in e.g. opal/opal, where some calls have both paths (opal implements an own corelib) } diff --git a/ruby/ql/lib/codeql/ruby/security/OpenSSL.qll b/ruby/ql/lib/codeql/ruby/security/OpenSSL.qll index 261a7af462f..637f7dab04a 100644 --- a/ruby/ql/lib/codeql/ruby/security/OpenSSL.qll +++ b/ruby/ql/lib/codeql/ruby/security/OpenSSL.qll @@ -597,7 +597,7 @@ private module Digest { call = API::getTopLevelMember("OpenSSL").getMember("Digest").getMethod("new") | this = call.getReturn().getAMethodCall(["digest", "update", "<<"]) and - algo.matchesName(call.getCallNode() + algo.matchesName(call.asCall() .getArgument(0) .asExpr() .getExpr() @@ -619,7 +619,7 @@ private module Digest { Cryptography::HashingAlgorithm algo; DigestCallDirect() { - this = API::getTopLevelMember("OpenSSL").getMember("Digest").getMethod("digest").getCallNode() and + this = API::getTopLevelMember("OpenSSL").getMember("Digest").getMethod("digest").asCall() and algo.matchesName(this.getArgument(0).asExpr().getExpr().getConstantValue().getString()) } diff --git a/ruby/ql/lib/codeql/ruby/typetracking/ApiGraphShared.qll b/ruby/ql/lib/codeql/ruby/typetracking/ApiGraphShared.qll new file mode 100644 index 00000000000..07b10cb1303 --- /dev/null +++ b/ruby/ql/lib/codeql/ruby/typetracking/ApiGraphShared.qll @@ -0,0 +1,328 @@ +/** + * Parts of API graphs that can be shared with other dynamic languages. + * + * Depends on TypeTrackerSpecific for the corresponding language. + */ + +private import codeql.Locations +private import codeql.ruby.typetracking.TypeTracker +private import TypeTrackerSpecific + +/** + * The signature to use when instantiating `ApiGraphShared`. + * + * The implementor should define a newtype with at least three branches as follows: + * ```ql + * newtype TApiNode = + * MkForwardNode(LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or + * MkBackwardNode(LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or + * MkSinkNode(Node node) { ... } or + * ... + * ``` + * + * The three branches should be exposed through `getForwardNode`, `getBackwardNode`, and `getSinkNode`, respectively. + */ +signature module ApiGraphSharedSig { + /** A node in the API graph. */ + class ApiNode { + /** Gets a string representation of this API node. */ + string toString(); + + /** Gets the location associated with this API node, if any. */ + Location getLocation(); + } + + /** + * Gets the forward node with the given type-tracking state. + * + * This node will have outgoing epsilon edges to its type-tracking successors. + */ + ApiNode getForwardNode(TypeTrackingNode node, TypeTracker t); + + /** + * Gets the backward node with the given type-tracking state. + * + * This node will have outgoing epsilon edges to its type-tracking predecessors. + */ + ApiNode getBackwardNode(TypeTrackingNode node, TypeTracker t); + + /** + * Gets the sink node corresponding to `node`. + * + * Since sinks are not generally `LocalSourceNode`s, such nodes are materialised separately in order for + * the API graph to include representatives for sinks. Note that there is no corresponding case for "source" + * nodes as these are represented as forward nodes with initial-state type-trackers. + * + * Sink nodes have outgoing epsilon edges to the backward nodes corresponding to their local sources. + */ + ApiNode getSinkNode(Node node); + + /** + * Holds if a language-specific epsilon edge `pred -> succ` should be generated. + */ + predicate specificEpsilonEdge(ApiNode pred, ApiNode succ); +} + +/** + * Parts of API graphs that can be shared between language implementations. + */ +module ApiGraphShared { + private import S + + /** Gets a local source of `node`. */ + bindingset[node] + pragma[inline_late] + TypeTrackingNode getALocalSourceStrict(Node node) { result = node.getALocalSource() } + + cached + private module Cached { + /** + * Holds if there is an epsilon edge `pred -> succ`. + * + * That relation is reflexive, so `fastTC` produces the equivalent of a reflexive, transitive closure. + */ + pragma[noopt] + cached + predicate epsilonEdge(ApiNode pred, ApiNode succ) { + exists( + StepSummary summary, TypeTrackingNode predNode, TypeTracker predState, + TypeTrackingNode succNode, TypeTracker succState + | + StepSummary::stepCall(predNode, succNode, summary) + or + StepSummary::stepNoCall(predNode, succNode, summary) + | + pred = getForwardNode(predNode, predState) and + succState = StepSummary::append(predState, summary) and + succ = getForwardNode(succNode, succState) + or + succ = getBackwardNode(predNode, predState) and // swap order for backward flow + succState = StepSummary::append(predState, summary) and + pred = getBackwardNode(succNode, succState) // swap order for backward flow + ) + or + exists(Node sink, TypeTrackingNode localSource | + pred = getSinkNode(sink) and + localSource = getALocalSourceStrict(sink) and + succ = getBackwardStartNode(localSource) + ) + or + specificEpsilonEdge(pred, succ) + or + succ instanceof ApiNode and + succ = pred + } + + /** + * Holds if `pred` can reach `succ` by zero or more epsilon edges. + */ + cached + predicate epsilonStar(ApiNode pred, ApiNode succ) = fastTC(epsilonEdge/2)(pred, succ) + + /** Gets the API node to use when starting forward flow from `source` */ + cached + ApiNode forwardStartNode(TypeTrackingNode source) { + result = getForwardNode(source, TypeTracker::end(false)) + } + + /** Gets the API node to use when starting backward flow from `sink` */ + cached + ApiNode backwardStartNode(TypeTrackingNode sink) { + // There is backward flow A->B iff there is forward flow B->A. + // The starting point of backward flow corresponds to the end of a forward flow, and vice versa. + result = getBackwardNode(sink, TypeTracker::end(_)) + } + + /** Gets `node` as a data flow source. */ + cached + TypeTrackingNode asSourceCached(ApiNode node) { node = forwardEndNode(result) } + + /** Gets `node` as a data flow sink. */ + cached + Node asSinkCached(ApiNode node) { node = getSinkNode(result) } + } + + private import Cached + + /** Gets an API node corresponding to the end of forward-tracking to `localSource`. */ + pragma[nomagic] + private ApiNode forwardEndNode(TypeTrackingNode localSource) { + result = getForwardNode(localSource, TypeTracker::end(_)) + } + + /** Gets an API node corresponding to the end of backtracking to `localSource`. */ + pragma[nomagic] + private ApiNode backwardEndNode(TypeTrackingNode localSource) { + result = getBackwardNode(localSource, TypeTracker::end(false)) + } + + /** Gets a node reachable from `node` by zero or more epsilon edges, including `node` itself. */ + bindingset[node] + pragma[inline_late] + ApiNode getAnEpsilonSuccessorInline(ApiNode node) { epsilonStar(node, result) } + + /** Gets `node` as a data flow sink. */ + bindingset[node] + pragma[inline_late] + Node asSinkInline(ApiNode node) { result = asSinkCached(node) } + + /** Gets `node` as a data flow source. */ + bindingset[node] + pragma[inline_late] + TypeTrackingNode asSourceInline(ApiNode node) { result = asSourceCached(node) } + + /** Gets a value reachable from `source`. */ + bindingset[source] + pragma[inline_late] + Node getAValueReachableFromSourceInline(ApiNode source) { + exists(TypeTrackingNode src | + src = asSourceInline(getAnEpsilonSuccessorInline(source)) and + src.flowsTo(pragma[only_bind_into](result)) + ) + } + + /** Gets a value that can reach `sink`. */ + bindingset[sink] + pragma[inline_late] + Node getAValueReachingSinkInline(ApiNode sink) { + result = asSinkInline(getAnEpsilonSuccessorInline(sink)) + } + + /** + * Gets the starting point for forward-tracking at `node`. + * + * Should be used to obtain the successor of an edge when constructing labelled edges. + */ + bindingset[node] + pragma[inline_late] + ApiNode getForwardStartNode(Node node) { result = forwardStartNode(node) } + + /** + * Gets the starting point of backtracking from `node`. + * + * Should be used to obtain the successor of an edge when constructing labelled edges. + */ + bindingset[node] + pragma[inline_late] + ApiNode getBackwardStartNode(Node node) { result = backwardStartNode(node) } + + /** + * Gets a possible ending point of forward-tracking at `node`. + * + * Should be used to obtain the predecessor of an edge when constructing labelled edges. + * + * This is not backed by a `cached` predicate, and should only be used for materialising `cached` + * predicates in the API graph implementation - it should not be called in later stages. + */ + bindingset[node] + pragma[inline_late] + ApiNode getForwardEndNode(Node node) { result = forwardEndNode(node) } + + /** + * Gets a possible ending point backtracking to `node`. + * + * Should be used to obtain the predecessor of an edge when constructing labelled edges. + * + * This is not backed by a `cached` predicate, and should only be used for materialising `cached` + * predicates in the API graph implementation - it should not be called in later stages. + */ + bindingset[node] + pragma[inline_late] + ApiNode getBackwardEndNode(Node node) { result = backwardEndNode(node) } + + /** + * Gets a possible eding point of forward or backward tracking at `node`. + * + * Should be used to obtain the predecessor of an edge generated from store or load edges. + */ + bindingset[node] + pragma[inline_late] + ApiNode getForwardOrBackwardEndNode(Node node) { + result = getForwardEndNode(node) or result = getBackwardEndNode(node) + } + + /** Gets an API node for tracking forward starting at `node`. This is the implementation of `DataFlow::LocalSourceNode.track()` */ + bindingset[node] + pragma[inline_late] + ApiNode getNodeForForwardTracking(Node node) { result = forwardStartNode(node) } + + /** Gets an API node for backtracking starting at `node`. The implementation of `DataFlow::Node.backtrack()`. */ + bindingset[node] + pragma[inline_late] + ApiNode getNodeForBacktracking(Node node) { + result = getBackwardStartNode(getALocalSourceStrict(node)) + } + + /** Parts of the shared module to be re-exported by the user-facing `API` module. */ + module Public { + /** + * The signature to use when instantiating the `ExplainFlow` module. + */ + signature module ExplainFlowSig { + /** Holds if `node` should be a source. */ + predicate isSource(ApiNode node); + + /** Holds if `node` should be a sink. */ + default predicate isSink(ApiNode node) { any() } + + /** Holds if `node` should be skipped in the generated paths. */ + default predicate isHidden(ApiNode node) { none() } + } + + /** + * Module to help debug and visualize the data flows underlying API graphs. + * + * This module exports the query predicates for a path-problem query, and should be imported + * into the top-level of such a query. + * + * The module argument should specify source and sink API nodes, and the resulting query + * will show paths of epsilon edges that go from a source to a sink. Only epsilon edges are visualized. + * + * To condense the output a bit, paths in which the source and sink are the same node are omitted. + */ + module ExplainFlow { + private import T + + private ApiNode relevantNode() { + isSink(result) and + result = getAnEpsilonSuccessorInline(any(ApiNode node | isSource(node))) + or + epsilonEdge(result, relevantNode()) + } + + /** Holds if `node` is part of the graph to visualize. */ + query predicate nodes(ApiNode node) { node = relevantNode() and not isHidden(node) } + + private predicate edgeToHiddenNode(ApiNode pred, ApiNode succ) { + epsilonEdge(pred, succ) and + isHidden(succ) and + pred = relevantNode() and + succ = relevantNode() + } + + /** Holds if `pred -> succ` is an edge in the graph to visualize. */ + query predicate edges(ApiNode pred, ApiNode succ) { + nodes(pred) and + nodes(succ) and + exists(ApiNode mid | + edgeToHiddenNode*(pred, mid) and + epsilonEdge(mid, succ) + ) + } + + /** Holds for each source/sink pair to visualize in the graph. */ + query predicate problems( + ApiNode location, ApiNode sourceNode, ApiNode sinkNode, string message + ) { + nodes(sourceNode) and + nodes(sinkNode) and + isSource(sourceNode) and + isSink(sinkNode) and + sinkNode = getAnEpsilonSuccessorInline(sourceNode) and + sourceNode != sinkNode and + location = sinkNode and + message = "Node flows here" + } + } + } +} diff --git a/ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll b/ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll index fb3d7bf828f..001375b4dc5 100644 --- a/ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll +++ b/ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll @@ -55,10 +55,9 @@ private module Cached { ) } - pragma[nomagic] - private TypeTracker noContentTypeTracker(boolean hasCall) { - result = MkTypeTracker(hasCall, noContent()) - } + /** Gets a type tracker with no content and the call bit set to the given value. */ + cached + TypeTracker noContentTypeTracker(boolean hasCall) { result = MkTypeTracker(hasCall, noContent()) } /** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */ cached @@ -318,6 +317,8 @@ class StepSummary extends TStepSummary { /** Provides predicates for updating step summaries (`StepSummary`s). */ module StepSummary { + predicate append = Cached::append/2; + /** * Gets the summary that corresponds to having taken a forwards * inter-procedural step from `nodeFrom` to `nodeTo`. @@ -378,6 +379,35 @@ module StepSummary { } deprecated predicate localSourceStoreStep = flowsToStoreStep/3; + + /** Gets the step summary for a level step. */ + StepSummary levelStep() { result = LevelStep() } + + /** Gets the step summary for a call step. */ + StepSummary callStep() { result = CallStep() } + + /** Gets the step summary for a return step. */ + StepSummary returnStep() { result = ReturnStep() } + + /** Gets the step summary for storing into `content`. */ + StepSummary storeStep(TypeTrackerContent content) { result = StoreStep(content) } + + /** Gets the step summary for loading from `content`. */ + StepSummary loadStep(TypeTrackerContent content) { result = LoadStep(content) } + + /** Gets the step summary for loading from `load` and then storing into `store`. */ + StepSummary loadStoreStep(TypeTrackerContent load, TypeTrackerContent store) { + result = LoadStoreStep(load, store) + } + + /** Gets the step summary for a step that only permits contents matched by `filter`. */ + StepSummary withContent(ContentFilter filter) { result = WithContent(filter) } + + /** Gets the step summary for a step that blocks contents matched by `filter`. */ + StepSummary withoutContent(ContentFilter filter) { result = WithoutContent(filter) } + + /** Gets the step summary for a jump step. */ + StepSummary jumpStep() { result = JumpStep() } } /** @@ -540,6 +570,13 @@ module TypeTracker { * Gets a valid end point of type tracking. */ TypeTracker end() { result.end() } + + /** + * INTERNAL USE ONLY. + * + * Gets a valid end point of type tracking with the call bit set to the given value. + */ + predicate end = Cached::noContentTypeTracker/1; } pragma[nomagic] diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/ApiGraphs.expected b/ruby/ql/test/library-tests/dataflow/api-graphs/ApiGraphs.expected index 43b6490b052..5677b16fedb 100644 --- a/ruby/ql/test/library-tests/dataflow/api-graphs/ApiGraphs.expected +++ b/ruby/ql/test/library-tests/dataflow/api-graphs/ApiGraphs.expected @@ -1,8 +1,8 @@ classMethodCalls -| test1.rb:58:1:58:8 | Use getMember("M1").getMember("C1").getMethod("m").getReturn() | -| test1.rb:59:1:59:8 | Use getMember("M2").getMember("C3").getMethod("m").getReturn() | +| test1.rb:58:1:58:8 | ForwardNode(call to m) | +| test1.rb:59:1:59:8 | ForwardNode(call to m) | instanceMethodCalls -| test1.rb:61:1:61:12 | Use getMember("M1").getMember("C1").getMethod("new").getReturn().getMethod("m").getReturn() | -| test1.rb:62:1:62:12 | Use getMember("M2").getMember("C3").getMethod("new").getReturn().getMethod("m").getReturn() | +| test1.rb:61:1:61:12 | ForwardNode(call to m) | +| test1.rb:62:1:62:12 | ForwardNode(call to m) | flowThroughArray | test1.rb:73:1:73:10 | call to m | diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/use.expected b/ruby/ql/test/library-tests/dataflow/api-graphs/VerifyApiGraphExpectations.expected similarity index 100% rename from ruby/ql/test/library-tests/dataflow/api-graphs/use.expected rename to ruby/ql/test/library-tests/dataflow/api-graphs/VerifyApiGraphExpectations.expected diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/VerifyApiGraphExpectations.ql b/ruby/ql/test/library-tests/dataflow/api-graphs/VerifyApiGraphExpectations.ql new file mode 100644 index 00000000000..93b5aaf745e --- /dev/null +++ b/ruby/ql/test/library-tests/dataflow/api-graphs/VerifyApiGraphExpectations.ql @@ -0,0 +1,77 @@ +import ruby +import codeql.ruby.ast.internal.TreeSitter +import codeql.ruby.dataflow.internal.AccessPathSyntax +import codeql.ruby.frameworks.data.internal.ApiGraphModels +import codeql.ruby.ApiGraphs +import TestUtilities.InlineExpectationsTest + +class AccessPathFromExpectation extends AccessPath::Range { + AccessPathFromExpectation() { hasExpectationWithValue(_, this) } +} + +API::Node evaluatePath(AccessPath path, int n) { + path instanceof AccessPathFromExpectation and + n = 1 and + exists(AccessPathToken token | token = path.getToken(0) | + token.getName() = "Member" and + result = API::getTopLevelMember(token.getAnArgument()) + or + token.getName() = "Method" and + result = API::getTopLevelCall(token.getAnArgument()) + or + token.getName() = "EntryPoint" and + result = token.getAnArgument().(API::EntryPoint).getANode() + ) + or + result = getSuccessorFromNode(evaluatePath(path, n - 1), path.getToken(n - 1)) + or + result = getSuccessorFromInvoke(evaluatePath(path, n - 1), path.getToken(n - 1)) + or + // TODO this is a workaround, support parsing of Method['[]'] instead + path.getToken(n - 1).getName() = "MethodBracket" and + result = evaluatePath(path, n - 1).getMethod("[]") +} + +API::Node evaluatePath(AccessPath path) { result = evaluatePath(path, path.getNumToken()) } + +module ApiUseTest implements TestSig { + string getARelevantTag() { result = ["source", "sink", "call", "reachableFromSource"] } + + predicate hasActualResult(Location location, string element, string tag, string value) { + // All results are considered optional + none() + } + + predicate hasOptionalResult(Location location, string element, string tag, string value) { + exists(API::Node apiNode, DataFlow::Node dataflowNode | + apiNode = evaluatePath(value) and + ( + tag = "source" and dataflowNode = apiNode.asSource() + or + tag = "reachableFromSource" and dataflowNode = apiNode.getAValueReachableFromSource() + or + tag = "sink" and dataflowNode = apiNode.asSink() + or + tag = "call" and dataflowNode = apiNode.asCall() + ) and + location = dataflowNode.getLocation() and + element = dataflowNode.toString() + ) + } +} + +import MakeTest + +class CustomEntryPointCall extends API::EntryPoint { + CustomEntryPointCall() { this = "CustomEntryPointCall" } + + override DataFlow::CallNode getACall() { result.getMethodName() = "customEntryPointCall" } +} + +class CustomEntryPointUse extends API::EntryPoint { + CustomEntryPointUse() { this = "CustomEntryPointUse" } + + override DataFlow::LocalSourceNode getASource() { + result.(DataFlow::CallNode).getMethodName() = "customEntryPointUse" + } +} diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/callbacks.rb b/ruby/ql/test/library-tests/dataflow/api-graphs/callbacks.rb index 34c4d17d212..35cf4471b48 100644 --- a/ruby/ql/test/library-tests/dataflow/api-graphs/callbacks.rb +++ b/ruby/ql/test/library-tests/dataflow/api-graphs/callbacks.rb @@ -1,39 +1,39 @@ -Something.foo.withCallback do |a, b| #$ use=getMember("Something").getMethod("foo").getReturn() - a.something #$ use=getMember("Something").getMethod("foo").getReturn().getMethod("withCallback").getBlock().getParameter(0).getMethod("something").getReturn() - b.somethingElse #$ use=getMember("Something").getMethod("foo").getReturn().getMethod("withCallback").getBlock().getParameter(1).getMethod("somethingElse").getReturn() -end #$ use=getMember("Something").getMethod("foo").getReturn().getMethod("withCallback").getReturn() +Something.foo.withCallback do |a, b| #$ source=Member[Something].Method[foo].ReturnValue + a.something #$ source=Member[Something].Method[foo].ReturnValue.Method[withCallback].Argument[block].Argument[0].Method[something].ReturnValue + b.somethingElse #$ source=Member[Something].Method[foo].ReturnValue.Method[withCallback].Argument[block].Argument[1].Method[somethingElse].ReturnValue +end #$ source=Member[Something].Method[foo].ReturnValue.Method[withCallback].ReturnValue -Something.withNamedArg do |a:, b: nil| #$ use=getMember("Something") - a.something #$ use=getMember("Something").getMethod("withNamedArg").getBlock().getKeywordParameter("a").getMethod("something").getReturn() - b.somethingElse #$ use=getMember("Something").getMethod("withNamedArg").getBlock().getKeywordParameter("b").getMethod("somethingElse").getReturn() -end #$ use=getMember("Something").getMethod("withNamedArg").getReturn() +Something.withNamedArg do |a:, b: nil| #$ source=Member[Something] + a.something #$ source=Member[Something].Method[withNamedArg].Argument[block].Parameter[a:].Method[something].ReturnValue + b.somethingElse #$ source=Member[Something].Method[withNamedArg].Argument[block].Parameter[b:].Method[somethingElse].ReturnValue +end #$ source=Member[Something].Method[withNamedArg].ReturnValue -Something.withLambda ->(a, b) { #$ use=getMember("Something") - a.something #$ use=getMember("Something").getMethod("withLambda").getParameter(0).getParameter(0).getMethod("something").getReturn() - b.something #$ use=getMember("Something").getMethod("withLambda").getParameter(0).getParameter(1).getMethod("something").getReturn() -} #$ use=getMember("Something").getMethod("withLambda").getReturn() +Something.withLambda ->(a, b) { #$ source=Member[Something] + a.something #$ source=Member[Something].Method[withLambda].Argument[0].Parameter[0].Method[something].ReturnValue + b.something #$ source=Member[Something].Method[withLambda].Argument[0].Parameter[1].Method[something].ReturnValue +} #$ source=Member[Something].Method[withLambda].ReturnValue -Something.namedCallback( #$ use=getMember("Something") +Something.namedCallback( #$ source=Member[Something] onEvent: ->(a, b) { - a.something #$ use=getMember("Something").getMethod("namedCallback").getKeywordParameter("onEvent").getParameter(0).getMethod("something").getReturn() - b.something #$ use=getMember("Something").getMethod("namedCallback").getKeywordParameter("onEvent").getParameter(1).getMethod("something").getReturn() + a.something #$ source=Member[Something].Method[namedCallback].Argument[onEvent:].Parameter[0].Method[something].ReturnValue + b.something #$ source=Member[Something].Method[namedCallback].Argument[onEvent:].Parameter[1].Method[something].ReturnValue } -) #$ use=getMember("Something").getMethod("namedCallback").getReturn() +) #$ source=Member[Something].Method[namedCallback].ReturnValue -Something.nestedCall1 do |a| #$ use=getMember("Something") - a.nestedCall2 do |b:| #$ use=getMember("Something").getMethod("nestedCall1").getBlock().getParameter(0) - b.something #$ use=getMember("Something").getMethod("nestedCall1").getBlock().getParameter(0).getMethod("nestedCall2").getBlock().getKeywordParameter("b").getMethod("something").getReturn() - end #$ use=getMember("Something").getMethod("nestedCall1").getBlock().getParameter(0).getMethod("nestedCall2").getReturn() -end #$ use=getMember("Something").getMethod("nestedCall1").getReturn() +Something.nestedCall1 do |a| #$ source=Member[Something] + a.nestedCall2 do |b:| #$ reachableFromSource=Member[Something].Method[nestedCall1].Argument[block].Parameter[0] + b.something #$ source=Member[Something].Method[nestedCall1].Argument[block].Parameter[0].Method[nestedCall2].Argument[block].Parameter[b:].Method[something].ReturnValue + end #$ source=Member[Something].Method[nestedCall1].Argument[block].Parameter[0].Method[nestedCall2].ReturnValue +end #$ source=Member[Something].Method[nestedCall1].ReturnValue def getCallback() ->(x) { - x.something #$ use=getMember("Something").getMethod("indirectCallback").getParameter(0).getParameter(0).getMethod("something").getReturn() + x.something #$ source=Member[Something].Method[indirectCallback].Argument[0].Parameter[0].Method[something].ReturnValue } end -Something.indirectCallback(getCallback()) #$ use=getMember("Something").getMethod("indirectCallback").getReturn() +Something.indirectCallback(getCallback()) #$ source=Member[Something].Method[indirectCallback].ReturnValue -Something.withMixed do |a, *args, b| #$ use=getMember("Something") - a.something #$ use=getMember("Something").getMethod("withMixed").getBlock().getParameter(0).getMethod("something").getReturn() +Something.withMixed do |a, *args, b| #$ source=Member[Something] + a.something #$ source=Member[Something].Method[withMixed].Argument[block].Parameter[0].Method[something].ReturnValue # b.something # not currently handled correctly -end #$ use=getMember("Something").getMethod("withMixed").getReturn() +end #$ source=Member[Something].Method[withMixed].ReturnValue diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/chained-access.rb b/ruby/ql/test/library-tests/dataflow/api-graphs/chained-access.rb new file mode 100644 index 00000000000..b0e4f07e701 --- /dev/null +++ b/ruby/ql/test/library-tests/dataflow/api-graphs/chained-access.rb @@ -0,0 +1,31 @@ +def chained_access1 + Something.foo [[[ + 'sink' # $ sink=Member[Something].Method[foo].Argument[0].Element[0].Element[0].Element[0] + ]]] +end + +def chained_access2 + array = [] + array[0] = [[ + 'sink' # $ sink=Member[Something].Method[foo].Argument[0].Element[0].Element[0].Element[0] + ]] + Something.foo array +end + +def chained_access3 + array = [[]] + array[0][0] = [ + 'sink' # $ sink=Member[Something].Method[foo].Argument[0].Element[0].Element[0].Element[0] + ] + Something.foo array +end + +def chained_access4 + Something.foo { + :one => { + :two => { + :three => 'sink' # $ sink=Member[Something].Method[foo].Argument[0].Element[:one].Element[:two].Element[:three] + } + } + } +end diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/method-callbacks.rb b/ruby/ql/test/library-tests/dataflow/api-graphs/method-callbacks.rb new file mode 100644 index 00000000000..63a4b4c3dce --- /dev/null +++ b/ruby/ql/test/library-tests/dataflow/api-graphs/method-callbacks.rb @@ -0,0 +1,64 @@ +class BaseClass + def inheritedInstanceMethod + yield "taint" # $ sink=Member[Something].Method[foo].Argument[block].ReturnValue.Method[inheritedInstanceMethod].Parameter[block].Argument[0] + end + + def self.inheritedSingletonMethod + yield "taint" # $ sink=Member[Something].Method[bar].Argument[block].ReturnValue.Method[inheritedSingletonMethod].Parameter[block].Argument[0] + end +end + +class ClassWithCallbacks < BaseClass + def instanceMethod + yield "taint" # $ sink=Member[Something].Method[foo].Argument[block].ReturnValue.Method[instanceMethod].Parameter[block].Argument[0] + end + + def self.singletonMethod + yield "bar" # $ sink=Member[Something].Method[bar].Argument[block].ReturnValue.Method[singletonMethod].Parameter[block].Argument[0] + end + + def escapeSelf + Something.baz { self } + end + + def self.escapeSingletonSelf + Something.baz { self } + end + + def self.foo x + x # $ reachableFromSource=Member[BaseClass].Method[foo].Parameter[0] + x # $ reachableFromSource=Member[ClassWithCallbacks].Method[foo].Parameter[0] + x # $ reachableFromSource=Member[Subclass].Method[foo].Parameter[0] + end + + def bar x + x # $ reachableFromSource=Member[BaseClass].Instance.Method[bar].Parameter[0] + x # $ reachableFromSource=Member[ClassWithCallbacks].Instance.Method[bar].Parameter[0] + x # $ reachableFromSource=Member[Subclass].Instance.Method[bar].Parameter[0] + end +end + +class Subclass < ClassWithCallbacks + def instanceMethodInSubclass + yield "bar" # $ sink=Member[Something].Method[baz].Argument[block].ReturnValue.Method[instanceMethodInSubclass].Parameter[block].Argument[0] + end + + def self.singletonMethodInSubclass + yield "bar" # $ sink=Member[Something].Method[baz].Argument[block].ReturnValue.Method[singletonMethodInSubclass].Parameter[block].Argument[0] + end +end + +Something.foo { ClassWithCallbacks.new } +Something.bar { ClassWithCallbacks } + +class ClassWithCallMethod + def call x + x # $ reachableFromSource=Method[topLevelMethod].Argument[0].Parameter[0] + "bar" # $ sink=Method[topLevelMethod].Argument[0].ReturnValue + end +end + +topLevelMethod ClassWithCallMethod.new + +blah = topLevelMethod +blah # $ reachableFromSource=Method[topLevelMethod].ReturnValue diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/self-dot-class.rb b/ruby/ql/test/library-tests/dataflow/api-graphs/self-dot-class.rb new file mode 100644 index 00000000000..178cacbe2c0 --- /dev/null +++ b/ruby/ql/test/library-tests/dataflow/api-graphs/self-dot-class.rb @@ -0,0 +1,10 @@ +module SelfDotClass + module Mixin + def foo + self.class.bar # $ call=Member[Foo].Method[bar] + end + end + class Subclass < Foo + include Mixin + end +end diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/test1.rb b/ruby/ql/test/library-tests/dataflow/api-graphs/test1.rb index 86b8bce9587..3af793dd4f7 100644 --- a/ruby/ql/test/library-tests/dataflow/api-graphs/test1.rb +++ b/ruby/ql/test/library-tests/dataflow/api-graphs/test1.rb @@ -1,34 +1,34 @@ -MyModule #$ use=getMember("MyModule") -print MyModule.foo #$ use=getMember("MyModule").getMethod("foo").getReturn() -Kernel.print(e) #$ use=getMember("Kernel").getMethod("print").getReturn() def=getMember("Kernel").getMethod("print").getParameter(0) -Object::Kernel #$ use=getMember("Kernel") -Object::Kernel.print(e) #$ use=getMember("Kernel").getMethod("print").getReturn() +MyModule #$ source=Member[MyModule] +print MyModule.foo #$ source=Member[MyModule].Method[foo].ReturnValue +Kernel.print(e) #$ source=Member[Kernel].Method[print].ReturnValue sink=Member[Kernel].Method[print].Argument[0] +Object::Kernel #$ source=Member[Kernel] +Object::Kernel.print(e) #$ source=Member[Kernel].Method[print].ReturnValue begin - print MyModule.bar #$ use=getMember("MyModule").getMethod("bar").getReturn() - raise AttributeError #$ use=getMember("AttributeError") -rescue AttributeError => e #$ use=getMember("AttributeError") - Kernel.print(e) #$ use=getMember("Kernel").getMethod("print").getReturn() + print MyModule.bar #$ source=Member[MyModule].Method[bar].ReturnValue + raise AttributeError #$ source=Member[AttributeError] +rescue AttributeError => e #$ source=Member[AttributeError] + Kernel.print(e) #$ source=Member[Kernel].Method[print].ReturnValue end -Unknown.new.run #$ use=getMember("Unknown").getMethod("new").getReturn().getMethod("run").getReturn() -Foo::Bar::Baz #$ use=getMember("Foo").getMember("Bar").getMember("Baz") +Unknown.new.run #$ source=Member[Unknown].Method[new].ReturnValue.Method[run].ReturnValue +Foo::Bar::Baz #$ source=Member[Foo].Member[Bar].Member[Baz] -Const = [1, 2, 3] #$ use=getMember("Array").getMethod("[]").getReturn() -Const.each do |c| #$ use=getMember("Const") - puts c #$ use=getMember("Const").getMethod("each").getBlock().getParameter(0) use=getMember("Const").getContent(element) -end #$ use=getMember("Const").getMethod("each").getReturn() def=getMember("Const").getMethod("each").getBlock() +Const = [1, 2, 3] #$ source=Member[Array].MethodBracket.ReturnValue +Const.each do |c| #$ source=Member[Const] + puts c #$ reachableFromSource=Member[Const].Method[each].Argument[block].Parameter[0] reachableFromSource=Member[Const].Element[any] +end #$ source=Member[Const].Method[each].ReturnValue sink=Member[Const].Method[each].Argument[block] -foo = Foo #$ use=getMember("Foo") -foo::Bar::Baz #$ use=getMember("Foo").getMember("Bar").getMember("Baz") +foo = Foo #$ source=Member[Foo] +foo::Bar::Baz #$ source=Member[Foo].Member[Bar].Member[Baz] -FooAlias = Foo #$ use=getMember("Foo") -FooAlias::Bar::Baz #$ use=getMember("Foo").getMember("Bar").getMember("Baz") +FooAlias = Foo #$ source=Member[Foo] +FooAlias::Bar::Baz #$ source=Member[Foo].Member[Bar].Member[Baz] source=Member[FooAlias].Member[Bar].Member[Baz] module Outer module Inner end end -Outer::Inner.foo #$ use=getMember("Outer").getMember("Inner").getMethod("foo").getReturn() +Outer::Inner.foo #$ source=Member[Outer].Member[Inner].Method[foo].ReturnValue module M1 class C1 @@ -40,36 +40,36 @@ module M1 end end -class C2 < M1::C1 #$ use=getMember("M1").getMember("C1") +class C2 < M1::C1 #$ source=Member[M1].Member[C1] end module M2 - class C3 < M1::C1 #$ use=getMember("M1").getMember("C1") + class C3 < M1::C1 #$ source=Member[M1].Member[C1] end - class C4 < C2 #$ use=getMember("C2") + class C4 < C2 #$ source=Member[C2] end end -C2 #$ use=getMember("C2") use=getMember("M1").getMember("C1").getASubclass() -M2::C3 #$ use=getMember("M2").getMember("C3") use=getMember("M1").getMember("C1").getASubclass() -M2::C4 #$ use=getMember("M2").getMember("C4") use=getMember("C2").getASubclass() use=getMember("M1").getMember("C1").getASubclass().getASubclass() +C2 #$ source=Member[C2] reachableFromSource=Member[M1].Member[C1] +M2::C3 #$ source=Member[M2].Member[C3] reachableFromSource=Member[M1].Member[C1] +M2::C4 #$ source=Member[M2].Member[C4] reachableFromSource=Member[C2] reachableFromSource=Member[M1].Member[C1] -M1::C1.m #$ use=getMember("M1").getMember("C1").getMethod("m").getReturn() -M2::C3.m #$ use=getMember("M2").getMember("C3").getMethod("m").getReturn() use=getMember("M1").getMember("C1").getASubclass().getMethod("m").getReturn() +M1::C1.m #$ source=Member[M1].Member[C1].Method[m].ReturnValue +M2::C3.m #$ source=Member[M2].Member[C3].Method[m].ReturnValue source=Member[M1].Member[C1].Method[m].ReturnValue -M1::C1.new.m #$ use=getMember("M1").getMember("C1").getMethod("new").getReturn().getMethod("m").getReturn() -M2::C3.new.m #$ use=getMember("M2").getMember("C3").getMethod("new").getReturn().getMethod("m").getReturn() +M1::C1.new.m #$ source=Member[M1].Member[C1].Method[new].ReturnValue.Method[m].ReturnValue +M2::C3.new.m #$ source=Member[M2].Member[C3].Method[new].ReturnValue.Method[m].ReturnValue -Foo.foo(a,b:c) #$ use=getMember("Foo").getMethod("foo").getReturn() def=getMember("Foo").getMethod("foo").getParameter(0) def=getMember("Foo").getMethod("foo").getKeywordParameter("b") +Foo.foo(a,b:c) #$ source=Member[Foo].Method[foo].ReturnValue sink=Member[Foo].Method[foo].Argument[0] sink=Member[Foo].Method[foo].Argument[b:] def userDefinedFunction(x, y) x.noApiGraph(y) - x.customEntryPointCall(y) #$ call=entryPoint("CustomEntryPointCall") use=entryPoint("CustomEntryPointCall").getReturn() rhs=entryPoint("CustomEntryPointCall").getParameter(0) - x.customEntryPointUse(y) #$ use=entryPoint("CustomEntryPointUse") + x.customEntryPointCall(y) #$ call=EntryPoint[CustomEntryPointCall] source=EntryPoint[CustomEntryPointCall].ReturnValue sink=EntryPoint[CustomEntryPointCall].Parameter[0] + x.customEntryPointUse(y) #$ source=EntryPoint[CustomEntryPointUse] end -array = [A::B::C] #$ use=getMember("Array").getMethod("[]").getReturn() -array[0].m #$ use=getMember("A").getMember("B").getMember("C").getMethod("m").getReturn() +array = [A::B::C] #$ source=Member[Array].MethodBracket.ReturnValue +array[0].m #$ source=Member[A].Member[B].Member[C].Method[m].ReturnValue source=Member[Array].MethodBracket.ReturnValue.Element[0].Method[m].ReturnValue -A::B::C[0] #$ use=getMember("A").getMember("B").getMember("C").getContent(element_0) +A::B::C[0] #$ source=Member[A].Member[B].Member[C].Element[0] diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/use.ql b/ruby/ql/test/library-tests/dataflow/api-graphs/use.ql deleted file mode 100644 index a0c11640ce0..00000000000 --- a/ruby/ql/test/library-tests/dataflow/api-graphs/use.ql +++ /dev/null @@ -1,88 +0,0 @@ -import codeql.ruby.AST -import codeql.ruby.DataFlow -import TestUtilities.InlineExpectationsTest -import codeql.ruby.ApiGraphs - -class CustomEntryPointCall extends API::EntryPoint { - CustomEntryPointCall() { this = "CustomEntryPointCall" } - - override DataFlow::CallNode getACall() { result.getMethodName() = "customEntryPointCall" } -} - -class CustomEntryPointUse extends API::EntryPoint { - CustomEntryPointUse() { this = "CustomEntryPointUse" } - - override DataFlow::LocalSourceNode getASource() { - result.(DataFlow::CallNode).getMethodName() = "customEntryPointUse" - } -} - -module ApiUseTest implements TestSig { - string getARelevantTag() { result = ["use", "def", "call"] } - - private predicate relevantNode(API::Node a, DataFlow::Node n, Location l, string tag) { - l = n.getLocation() and - ( - tag = "use" and - n = a.getAValueReachableFromSource() - or - tag = "def" and - n = a.asSink() - or - tag = "call" and - n = a.(API::MethodAccessNode).getCallNode() - ) - } - - predicate hasActualResult(Location location, string element, string tag, string value) { - tag = "use" and // def tags are always optional - exists(DataFlow::Node n | relevantNode(_, n, location, tag) | - // Only report the longest path on this line: - value = - max(API::Node a2, Location l2, DataFlow::Node n2 | - relevantNode(a2, n2, l2, tag) and - l2.getFile() = location.getFile() and - l2.getEndLine() = location.getEndLine() - | - a2.getPath() - order by - size(n2.asExpr().getExpr()), a2.getPath().length() desc, a2.getPath() desc - ) and - element = n.toString() - ) - } - - // We also permit optional annotations for any other path on the line. - // This is used to test subclass paths, which typically have a shorter canonical path. - predicate hasOptionalResult(Location location, string element, string tag, string value) { - exists(API::Node a, DataFlow::Node n | relevantNode(a, n, location, tag) | - element = n.toString() and - value = getAPath(a, _) - ) - } -} - -import MakeTest - -private int size(AstNode n) { not n instanceof StmtSequence and result = count(n.getAChild*()) } - -/** - * Gets a path of the given `length` from the root to the given node. - * This is a copy of `API::getAPath()` without the restriction on path length, - * which would otherwise rule out paths involving `getASubclass()`. - */ -string getAPath(API::Node node, int length) { - node instanceof API::Root and - length = 0 and - result = "" - or - exists(API::Node pred, API::Label::ApiLabel lbl, string predpath | - pred.getASuccessor(lbl) = node and - predpath = getAPath(pred, length - 1) and - exists(string dot | if length = 1 then dot = "" else dot = "." | - result = predpath + dot + lbl and - // avoid producing strings longer than 1MB - result.length() < 1000 * 1000 - ) - ) -} diff --git a/ruby/ql/test/library-tests/frameworks/Twirp/Twirp.expected b/ruby/ql/test/library-tests/frameworks/Twirp/Twirp.expected index 66da43ab78b..4f1b0c30920 100644 --- a/ruby/ql/test/library-tests/frameworks/Twirp/Twirp.expected +++ b/ruby/ql/test/library-tests/frameworks/Twirp/Twirp.expected @@ -1,6 +1,8 @@ sourceTest | hello_world_server.rb:8:13:8:15 | req | +| hello_world_server.rb:32:18:32:20 | req | ssrfSinkTest | hello_world_client.rb:6:47:6:75 | "http://localhost:8080/twirp" | serviceInstantiationTest | hello_world_server.rb:24:11:24:61 | call to new | +| hello_world_server.rb:38:1:38:57 | call to new | diff --git a/ruby/ql/test/library-tests/frameworks/Twirp/Twirp.ql b/ruby/ql/test/library-tests/frameworks/Twirp/Twirp.ql index 4c0494f9100..fee49cbb48c 100644 --- a/ruby/ql/test/library-tests/frameworks/Twirp/Twirp.ql +++ b/ruby/ql/test/library-tests/frameworks/Twirp/Twirp.ql @@ -5,4 +5,4 @@ query predicate sourceTest(Twirp::UnmarshaledParameter source) { any() } query predicate ssrfSinkTest(Twirp::ServiceUrlAsSsrfSink sink) { any() } -query predicate serviceInstantiationTest(Twirp::ServiceInstantiation si) { any() } +deprecated query predicate serviceInstantiationTest(Twirp::ServiceInstantiation si) { any() } diff --git a/ruby/ql/test/library-tests/frameworks/Twirp/hello_world_server.rb b/ruby/ql/test/library-tests/frameworks/Twirp/hello_world_server.rb index 1aa0b9aa8de..7cd117a5843 100644 --- a/ruby/ql/test/library-tests/frameworks/Twirp/hello_world_server.rb +++ b/ruby/ql/test/library-tests/frameworks/Twirp/hello_world_server.rb @@ -5,7 +5,7 @@ require_relative 'hello_world/service_twirp.rb' class HelloWorldHandler # test: request - def hello(req, env) + def hello(req, env) puts ">> Hello #{req.name}" {message: "Hello #{req.name}"} end @@ -13,7 +13,7 @@ end class FakeHelloWorldHandler # test: !request - def hello(req, env) + def hello(req, env) puts ">> Hello #{req.name}" {message: "Hello #{req.name}"} end @@ -21,9 +21,18 @@ end handler = HelloWorldHandler.new() # test: serviceInstantiation -service = Example::HelloWorld::HelloWorldService.new(handler) +service = Example::HelloWorld::HelloWorldService.new(handler) path_prefix = "/twirp/" + service.full_name server = WEBrick::HTTPServer.new(Port: 8080) server.mount path_prefix, Rack::Handler::WEBrick, service server.start + +class StaticHandler + def self.hello(req, env) + puts ">> Hello #{req.name}" + {message: "Hello #{req.name}"} + end +end + +Example::HelloWorld::HelloWorldService.new(StaticHandler) diff --git a/ruby/ql/test/library-tests/frameworks/action_dispatch/ActionDispatch.expected b/ruby/ql/test/library-tests/frameworks/action_dispatch/ActionDispatch.expected index 71327350941..4eacd48bd60 100644 --- a/ruby/ql/test/library-tests/frameworks/action_dispatch/ActionDispatch.expected +++ b/ruby/ql/test/library-tests/frameworks/action_dispatch/ActionDispatch.expected @@ -55,12 +55,12 @@ underscore | LotsOfCapitalLetters | lots_of_capital_letters | | invalid | invalid | mimeTypeInstances -| mime_type.rb:2:6:2:28 | Use getMember("Mime").getContent(element_text/html) | -| mime_type.rb:3:6:3:32 | Use getMember("Mime").getMember("Type").getMethod("new").getReturn() | -| mime_type.rb:4:6:4:35 | Use getMember("Mime").getMember("Type").getMethod("lookup").getReturn() | -| mime_type.rb:5:6:5:43 | Use getMember("Mime").getMember("Type").getMethod("lookup_by_extension").getReturn() | -| mime_type.rb:6:6:6:47 | Use getMember("Mime").getMember("Type").getMethod("register").getReturn() | -| mime_type.rb:7:6:7:64 | Use getMember("Mime").getMember("Type").getMethod("register_alias").getReturn() | +| mime_type.rb:2:6:2:28 | ForwardNode(call to fetch) | +| mime_type.rb:3:6:3:32 | ForwardNode(call to new) | +| mime_type.rb:4:6:4:35 | ForwardNode(call to lookup) | +| mime_type.rb:5:6:5:43 | ForwardNode(call to lookup_by_extension) | +| mime_type.rb:6:6:6:47 | ForwardNode(call to register) | +| mime_type.rb:7:6:7:64 | ForwardNode(call to register_alias) | mimeTypeMatchRegExpInterpretations | mime_type.rb:11:11:11:19 | "foo/bar" | | mime_type.rb:12:7:12:15 | "foo/bar" | diff --git a/ruby/ql/test/library-tests/frameworks/active_record/ActiveRecord.expected b/ruby/ql/test/library-tests/frameworks/active_record/ActiveRecord.expected index 0374a54c0c1..a3ee4ebebf5 100644 --- a/ruby/ql/test/library-tests/frameworks/active_record/ActiveRecord.expected +++ b/ruby/ql/test/library-tests/frameworks/active_record/ActiveRecord.expected @@ -10,6 +10,7 @@ activeRecordInstances | ActiveRecord.rb:9:5:9:68 | call to find | | ActiveRecord.rb:13:5:13:40 | call to find_by | | ActiveRecord.rb:13:5:13:46 | call to users | +| ActiveRecord.rb:35:5:35:51 | call to authenticate | | ActiveRecord.rb:36:5:36:30 | call to find_by_name | | ActiveRecord.rb:55:5:57:7 | if ... | | ActiveRecord.rb:55:43:56:40 | then ... | @@ -107,12 +108,14 @@ activeRecordSqlExecutionRanges | ActiveRecord.rb:19:16:19:24 | condition | | ActiveRecord.rb:28:30:28:44 | ...[...] | | ActiveRecord.rb:29:20:29:42 | "id = '#{...}'" | +| ActiveRecord.rb:30:21:30:45 | call to [] | | ActiveRecord.rb:30:22:30:44 | "id = '#{...}'" | | ActiveRecord.rb:31:16:31:21 | <<-SQL | | ActiveRecord.rb:34:20:34:47 | "user.id = '#{...}'" | | ActiveRecord.rb:46:20:46:32 | ... + ... | | ActiveRecord.rb:52:16:52:28 | "name #{...}" | | ActiveRecord.rb:56:20:56:39 | "username = #{...}" | +| ActiveRecord.rb:68:21:68:44 | ...[...] | | ActiveRecord.rb:106:27:106:76 | "this is an unsafe annotation:..." | activeRecordModelClassMethodCalls | ActiveRecord.rb:2:3:2:17 | call to has_many | @@ -127,7 +130,6 @@ activeRecordModelClassMethodCalls | ActiveRecord.rb:31:5:31:35 | call to where | | ActiveRecord.rb:34:5:34:14 | call to where | | ActiveRecord.rb:34:5:34:48 | call to not | -| ActiveRecord.rb:35:5:35:51 | call to authenticate | | ActiveRecord.rb:36:5:36:30 | call to find_by_name | | ActiveRecord.rb:37:5:37:36 | call to not_a_find_by_method | | ActiveRecord.rb:46:5:46:33 | call to delete_by | @@ -135,7 +137,6 @@ activeRecordModelClassMethodCalls | ActiveRecord.rb:56:7:56:40 | call to find_by | | ActiveRecord.rb:60:5:60:33 | call to find_by | | ActiveRecord.rb:62:5:62:34 | call to find | -| ActiveRecord.rb:68:5:68:45 | call to delete_by | | ActiveRecord.rb:72:5:72:24 | call to create | | ActiveRecord.rb:76:5:76:66 | call to create | | ActiveRecord.rb:80:5:80:68 | call to create | @@ -152,6 +153,96 @@ activeRecordModelClassMethodCalls | associations.rb:12:3:12:32 | call to has_and_belongs_to_many | | associations.rb:16:3:16:18 | call to belongs_to | | associations.rb:19:11:19:20 | call to new | +| associations.rb:21:9:21:21 | call to posts | +| associations.rb:21:9:21:28 | call to create | +| associations.rb:23:12:23:25 | call to comments | +| associations.rb:23:12:23:32 | call to create | +| associations.rb:25:11:25:22 | call to author | +| associations.rb:27:9:27:21 | call to posts | +| associations.rb:27:9:27:28 | call to create | +| associations.rb:29:1:29:13 | call to posts | +| associations.rb:29:1:29:22 | ... << ... | +| associations.rb:31:1:31:12 | call to author= | +| associations.rb:35:1:35:14 | call to comments | +| associations.rb:35:1:35:21 | call to create | +| associations.rb:35:1:35:28 | call to create | +| associations.rb:37:1:37:13 | call to posts | +| associations.rb:37:1:37:20 | call to reload | +| associations.rb:37:1:37:27 | call to create | +| associations.rb:39:1:39:15 | call to build_tag | +| associations.rb:40:1:40:15 | call to build_tag | +| associations.rb:42:1:42:13 | call to posts | +| associations.rb:42:1:42:25 | call to push | +| associations.rb:43:1:43:13 | call to posts | +| associations.rb:43:1:43:27 | call to concat | +| associations.rb:44:1:44:13 | call to posts | +| associations.rb:44:1:44:19 | call to build | +| associations.rb:45:1:45:13 | call to posts | +| associations.rb:45:1:45:20 | call to create | +| associations.rb:46:1:46:13 | call to posts | +| associations.rb:46:1:46:21 | call to create! | +| associations.rb:47:1:47:13 | call to posts | +| associations.rb:47:1:47:20 | call to delete | +| associations.rb:48:1:48:13 | call to posts | +| associations.rb:48:1:48:24 | call to delete_all | +| associations.rb:49:1:49:13 | call to posts | +| associations.rb:49:1:49:21 | call to destroy | +| associations.rb:50:1:50:13 | call to posts | +| associations.rb:50:1:50:25 | call to destroy_all | +| associations.rb:51:1:51:13 | call to posts | +| associations.rb:51:1:51:22 | call to distinct | +| associations.rb:51:1:51:36 | call to find | +| associations.rb:52:1:52:13 | call to posts | +| associations.rb:52:1:52:19 | call to reset | +| associations.rb:52:1:52:33 | call to find | +| associations.rb:53:1:53:13 | call to posts | +| associations.rb:53:1:53:20 | call to reload | +| associations.rb:53:1:53:34 | call to find | +activeRecordModelClassMethodCallsReplacement +| ActiveRecord.rb:1:1:3:3 | UserGroup | ActiveRecord.rb:2:3:2:17 | call to has_many | +| ActiveRecord.rb:1:1:3:3 | UserGroup | ActiveRecord.rb:13:5:13:40 | call to find_by | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:6:3:6:24 | call to belongs_to | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:9:5:9:68 | call to find | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:19:5:19:25 | call to destroy_by | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:28:5:28:45 | call to calculate | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:29:5:29:43 | call to delete_by | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:30:5:30:46 | call to destroy_by | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:31:5:31:35 | call to where | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:34:5:34:14 | call to where | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:35:5:35:51 | call to authenticate | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:36:5:36:30 | call to find_by_name | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:37:5:37:36 | call to not_a_find_by_method | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:46:5:46:33 | call to delete_by | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:52:5:52:29 | call to order | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:56:7:56:40 | call to find_by | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:60:5:60:33 | call to find_by | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:62:5:62:34 | call to find | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:68:5:68:45 | call to delete_by | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:72:5:72:24 | call to create | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:76:5:76:66 | call to create | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:80:5:80:68 | call to create | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:84:5:84:16 | call to create | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:88:5:88:27 | call to update | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:92:5:92:69 | call to update | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:96:5:96:71 | call to update | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:102:13:102:54 | call to annotate | +| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:106:13:106:77 | call to annotate | +| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:19:5:19:25 | call to destroy_by | +| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:68:5:68:45 | call to delete_by | +| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:72:5:72:24 | call to create | +| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:76:5:76:66 | call to create | +| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:80:5:80:68 | call to create | +| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:84:5:84:16 | call to create | +| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:88:5:88:27 | call to update | +| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:92:5:92:69 | call to update | +| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:96:5:96:71 | call to update | +| associations.rb:1:1:3:3 | Author | associations.rb:2:3:2:17 | call to has_many | +| associations.rb:1:1:3:3 | Author | associations.rb:19:11:19:20 | call to new | +| associations.rb:5:1:9:3 | Post | associations.rb:6:3:6:20 | call to belongs_to | +| associations.rb:5:1:9:3 | Post | associations.rb:7:3:7:20 | call to has_many | +| associations.rb:5:1:9:3 | Post | associations.rb:8:3:8:31 | call to has_and_belongs_to_many | +| associations.rb:11:1:13:3 | Tag | associations.rb:12:3:12:32 | call to has_and_belongs_to_many | +| associations.rb:15:1:17:3 | Comment | associations.rb:16:3:16:18 | call to belongs_to | potentiallyUnsafeSqlExecutingMethodCall | ActiveRecord.rb:9:5:9:68 | call to find | | ActiveRecord.rb:19:5:19:25 | call to destroy_by | diff --git a/ruby/ql/test/library-tests/frameworks/active_record/ActiveRecord.ql b/ruby/ql/test/library-tests/frameworks/active_record/ActiveRecord.ql index 731679e437b..348ca1456e2 100644 --- a/ruby/ql/test/library-tests/frameworks/active_record/ActiveRecord.ql +++ b/ruby/ql/test/library-tests/frameworks/active_record/ActiveRecord.ql @@ -9,9 +9,19 @@ query predicate activeRecordInstances(ActiveRecordInstance i) { any() } query predicate activeRecordSqlExecutionRanges(ActiveRecordSqlExecutionRange range) { any() } -query predicate activeRecordModelClassMethodCalls(ActiveRecordModelClassMethodCall call) { any() } +deprecated query predicate activeRecordModelClassMethodCalls(ActiveRecordModelClassMethodCall call) { + any() +} -query predicate potentiallyUnsafeSqlExecutingMethodCall(PotentiallyUnsafeSqlExecutingMethodCall call) { +query predicate activeRecordModelClassMethodCallsReplacement( + ActiveRecordModelClass cls, DataFlow::CallNode call +) { + call = cls.getClassNode().trackModule().getAMethodCall(_) +} + +deprecated query predicate potentiallyUnsafeSqlExecutingMethodCall( + PotentiallyUnsafeSqlExecutingMethodCall call +) { any() } diff --git a/ruby/ql/test/library-tests/frameworks/active_resource/ActiveResource.expected b/ruby/ql/test/library-tests/frameworks/active_resource/ActiveResource.expected index a55946c1852..e6d3b056971 100644 --- a/ruby/ql/test/library-tests/frameworks/active_resource/ActiveResource.expected +++ b/ruby/ql/test/library-tests/frameworks/active_resource/ActiveResource.expected @@ -33,6 +33,13 @@ modelInstances | active_resource.rb:26:9:26:14 | people | | active_resource.rb:26:9:26:20 | call to first | | active_resource.rb:27:1:27:5 | alice | +modelInstancesAsSource +| active_resource.rb:1:1:3:3 | Person | active_resource.rb:5:9:5:33 | call to new | +| active_resource.rb:1:1:3:3 | Person | active_resource.rb:8:9:8:22 | call to find | +| active_resource.rb:1:1:3:3 | Person | active_resource.rb:16:1:16:23 | call to new | +| active_resource.rb:1:1:3:3 | Person | active_resource.rb:18:1:18:22 | call to get | +| active_resource.rb:1:1:3:3 | Person | active_resource.rb:24:10:24:26 | call to find | +| active_resource.rb:1:1:3:3 | Person | active_resource.rb:26:9:26:20 | call to first | modelInstanceMethodCalls | active_resource.rb:6:1:6:10 | call to save | | active_resource.rb:9:1:9:13 | call to address= | @@ -50,3 +57,6 @@ collections | active_resource.rb:24:1:24:26 | ... = ... | | active_resource.rb:24:10:24:26 | call to find | | active_resource.rb:26:9:26:14 | people | +collectionSources +| active_resource.rb:23:10:23:19 | call to all | +| active_resource.rb:24:10:24:26 | call to find | diff --git a/ruby/ql/test/library-tests/frameworks/active_resource/ActiveResource.ql b/ruby/ql/test/library-tests/frameworks/active_resource/ActiveResource.ql index 1f2fd1efcf1..f1898ddbc98 100644 --- a/ruby/ql/test/library-tests/frameworks/active_resource/ActiveResource.ql +++ b/ruby/ql/test/library-tests/frameworks/active_resource/ActiveResource.ql @@ -3,7 +3,8 @@ import codeql.ruby.DataFlow import codeql.ruby.frameworks.ActiveResource query predicate modelClasses( - ActiveResource::ModelClass c, DataFlow::Node siteAssignCall, boolean disablesCertificateValidation + ActiveResource::ModelClassNode c, DataFlow::Node siteAssignCall, + boolean disablesCertificateValidation ) { c.getASiteAssignment() = siteAssignCall and if c.disablesCertificateValidation(siteAssignCall) @@ -13,8 +14,16 @@ query predicate modelClasses( query predicate modelClassMethodCalls(ActiveResource::ModelClassMethodCall c) { any() } -query predicate modelInstances(ActiveResource::ModelInstance c) { any() } +deprecated query predicate modelInstances(ActiveResource::ModelInstance c) { any() } + +query predicate modelInstancesAsSource( + ActiveResource::ModelClassNode cls, DataFlow::LocalSourceNode node +) { + node = cls.getAnInstanceReference().asSource() +} query predicate modelInstanceMethodCalls(ActiveResource::ModelInstanceMethodCall c) { any() } -query predicate collections(ActiveResource::Collection c) { any() } +deprecated query predicate collections(ActiveResource::Collection c) { any() } + +query predicate collectionSources(ActiveResource::CollectionSource c) { any() } diff --git a/ruby/ql/test/query-tests/security/cwe-079/StoredXSS.expected b/ruby/ql/test/query-tests/security/cwe-079/StoredXSS.expected index 0eaf24029ef..460c7da3145 100644 --- a/ruby/ql/test/query-tests/security/cwe-079/StoredXSS.expected +++ b/ruby/ql/test/query-tests/security/cwe-079/StoredXSS.expected @@ -3,7 +3,6 @@ edges | app/controllers/foo/stores_controller.rb:8:5:8:6 | dt | app/controllers/foo/stores_controller.rb:13:55:13:56 | dt | | app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read | app/controllers/foo/stores_controller.rb:8:5:8:6 | dt | | app/controllers/foo/stores_controller.rb:9:22:9:23 | dt | app/views/foo/stores/show.html.erb:37:3:37:16 | @instance_text | -| app/controllers/foo/stores_controller.rb:12:28:12:48 | call to raw_name | app/views/foo/stores/show.html.erb:82:5:82:24 | @other_user_raw_name | | app/controllers/foo/stores_controller.rb:13:55:13:56 | dt | app/views/foo/stores/show.html.erb:2:9:2:20 | call to display_text | | app/controllers/foo/stores_controller.rb:13:55:13:56 | dt | app/views/foo/stores/show.html.erb:5:9:5:21 | call to local_assigns [element :display_text] | | app/controllers/foo/stores_controller.rb:13:55:13:56 | dt | app/views/foo/stores/show.html.erb:9:9:9:21 | call to local_assigns [element :display_text] | @@ -22,7 +21,6 @@ nodes | app/controllers/foo/stores_controller.rb:8:5:8:6 | dt | semmle.label | dt | | app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read | semmle.label | call to read | | app/controllers/foo/stores_controller.rb:9:22:9:23 | dt | semmle.label | dt | -| app/controllers/foo/stores_controller.rb:12:28:12:48 | call to raw_name | semmle.label | call to raw_name | | app/controllers/foo/stores_controller.rb:13:55:13:56 | dt | semmle.label | dt | | app/views/foo/bars/_widget.html.erb:5:9:5:20 | call to display_text | semmle.label | call to display_text | | app/views/foo/bars/_widget.html.erb:8:9:8:21 | call to local_assigns [element :display_text] | semmle.label | call to local_assigns [element :display_text] | @@ -39,11 +37,7 @@ nodes | app/views/foo/stores/show.html.erb:40:64:40:87 | ... + ... | semmle.label | ... + ... | | app/views/foo/stores/show.html.erb:40:76:40:87 | call to display_text | semmle.label | call to display_text | | app/views/foo/stores/show.html.erb:46:5:46:16 | call to handle | semmle.label | call to handle | -| app/views/foo/stores/show.html.erb:49:5:49:18 | call to raw_name | semmle.label | call to raw_name | | app/views/foo/stores/show.html.erb:63:3:63:18 | call to handle | semmle.label | call to handle | -| app/views/foo/stores/show.html.erb:69:3:69:20 | call to raw_name | semmle.label | call to raw_name | -| app/views/foo/stores/show.html.erb:79:5:79:22 | call to display_name | semmle.label | call to display_name | -| app/views/foo/stores/show.html.erb:82:5:82:24 | @other_user_raw_name | semmle.label | @other_user_raw_name | | app/views/foo/stores/show.html.erb:86:3:86:29 | call to sprintf | semmle.label | call to sprintf | | app/views/foo/stores/show.html.erb:86:17:86:28 | call to handle | semmle.label | call to handle | subpaths @@ -57,9 +51,5 @@ subpaths | app/views/foo/stores/show.html.erb:32:3:32:14 | call to display_text | app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read | app/views/foo/stores/show.html.erb:32:3:32:14 | call to display_text | Stored cross-site scripting vulnerability due to $@. | app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read | stored value | | app/views/foo/stores/show.html.erb:37:3:37:16 | @instance_text | app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read | app/views/foo/stores/show.html.erb:37:3:37:16 | @instance_text | Stored cross-site scripting vulnerability due to $@. | app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read | stored value | | app/views/foo/stores/show.html.erb:46:5:46:16 | call to handle | app/views/foo/stores/show.html.erb:46:5:46:16 | call to handle | app/views/foo/stores/show.html.erb:46:5:46:16 | call to handle | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:46:5:46:16 | call to handle | stored value | -| app/views/foo/stores/show.html.erb:49:5:49:18 | call to raw_name | app/views/foo/stores/show.html.erb:49:5:49:18 | call to raw_name | app/views/foo/stores/show.html.erb:49:5:49:18 | call to raw_name | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:49:5:49:18 | call to raw_name | stored value | | app/views/foo/stores/show.html.erb:63:3:63:18 | call to handle | app/views/foo/stores/show.html.erb:63:3:63:18 | call to handle | app/views/foo/stores/show.html.erb:63:3:63:18 | call to handle | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:63:3:63:18 | call to handle | stored value | -| app/views/foo/stores/show.html.erb:69:3:69:20 | call to raw_name | app/views/foo/stores/show.html.erb:69:3:69:20 | call to raw_name | app/views/foo/stores/show.html.erb:69:3:69:20 | call to raw_name | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:69:3:69:20 | call to raw_name | stored value | -| app/views/foo/stores/show.html.erb:79:5:79:22 | call to display_name | app/views/foo/stores/show.html.erb:79:5:79:22 | call to display_name | app/views/foo/stores/show.html.erb:79:5:79:22 | call to display_name | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:79:5:79:22 | call to display_name | stored value | -| app/views/foo/stores/show.html.erb:82:5:82:24 | @other_user_raw_name | app/controllers/foo/stores_controller.rb:12:28:12:48 | call to raw_name | app/views/foo/stores/show.html.erb:82:5:82:24 | @other_user_raw_name | Stored cross-site scripting vulnerability due to $@. | app/controllers/foo/stores_controller.rb:12:28:12:48 | call to raw_name | stored value | | app/views/foo/stores/show.html.erb:86:3:86:29 | call to sprintf | app/views/foo/stores/show.html.erb:86:17:86:28 | call to handle | app/views/foo/stores/show.html.erb:86:3:86:29 | call to sprintf | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:86:17:86:28 | call to handle | stored value | diff --git a/ruby/ql/test/query-tests/security/cwe-079/app/views/foo/stores/show.html.erb b/ruby/ql/test/query-tests/security/cwe-079/app/views/foo/stores/show.html.erb index 29656a15a3d..d8afec1c432 100644 --- a/ruby/ql/test/query-tests/security/cwe-079/app/views/foo/stores/show.html.erb +++ b/ruby/ql/test/query-tests/security/cwe-079/app/views/foo/stores/show.html.erb @@ -63,7 +63,7 @@ some_user.handle.html_safe %> -<%# BAD: Indirect to a database value without escaping %> +<%# BAD: Indirect to a database value without escaping (currently missed due to lack of 'self' handling in ORM tracking) %> <%= some_user = User.find 1 some_user.raw_name.html_safe @@ -75,10 +75,10 @@ some_user.handle %> -<%# BAD: Indirect to a database value without escaping %> +<%# BAD: Indirect to a database value without escaping (currently missed due to lack of 'self' handling in ORM tracking) %> <%= @user.display_name.html_safe %> -<%# BAD: Indirect to a database value without escaping %> +<%# BAD: Indirect to a database value without escaping (currently missed due to lack of 'self' handling in ORM tracking) %> <%= @other_user_raw_name.html_safe %> <%# BAD: Kernel.sprintf is a taint-step %> diff --git a/ruby/ql/test/query-tests/security/cwe-089/SqlInjection.expected b/ruby/ql/test/query-tests/security/cwe-089/SqlInjection.expected index 0cc0d213dcc..161cdcc7751 100644 --- a/ruby/ql/test/query-tests/security/cwe-089/SqlInjection.expected +++ b/ruby/ql/test/query-tests/security/cwe-089/SqlInjection.expected @@ -22,6 +22,7 @@ edges | ActiveRecordInjection.rb:70:38:70:50 | ...[...] | ActiveRecordInjection.rb:8:31:8:34 | pass | | ActiveRecordInjection.rb:74:41:74:46 | call to params | ActiveRecordInjection.rb:74:41:74:51 | ...[...] | | ActiveRecordInjection.rb:74:41:74:51 | ...[...] | ActiveRecordInjection.rb:74:32:74:54 | "id = '#{...}'" | +| ActiveRecordInjection.rb:79:23:79:28 | call to params | ActiveRecordInjection.rb:79:23:79:35 | ...[...] | | ActiveRecordInjection.rb:83:17:83:22 | call to params | ActiveRecordInjection.rb:83:17:83:31 | ...[...] | | ActiveRecordInjection.rb:84:19:84:24 | call to params | ActiveRecordInjection.rb:84:19:84:33 | ...[...] | | ActiveRecordInjection.rb:88:18:88:23 | call to params | ActiveRecordInjection.rb:88:18:88:35 | ...[...] | @@ -35,6 +36,7 @@ edges | ActiveRecordInjection.rb:103:11:103:17 | ...[...] | ActiveRecordInjection.rb:103:5:103:7 | uid | | ActiveRecordInjection.rb:104:5:104:9 | uidEq | ActiveRecordInjection.rb:108:20:108:32 | ... + ... | | ActiveRecordInjection.rb:141:21:141:26 | call to params | ActiveRecordInjection.rb:141:21:141:44 | ...[...] | +| ActiveRecordInjection.rb:141:21:141:26 | call to params | ActiveRecordInjection.rb:141:21:141:44 | ...[...] | | ActiveRecordInjection.rb:141:21:141:44 | ...[...] | ActiveRecordInjection.rb:20:22:20:30 | condition | | ActiveRecordInjection.rb:155:59:155:64 | call to params | ActiveRecordInjection.rb:155:59:155:74 | ...[...] | | ActiveRecordInjection.rb:155:59:155:74 | ...[...] | ActiveRecordInjection.rb:155:27:155:76 | "this is an unsafe annotation:..." | @@ -102,6 +104,8 @@ nodes | ActiveRecordInjection.rb:74:32:74:54 | "id = '#{...}'" | semmle.label | "id = '#{...}'" | | ActiveRecordInjection.rb:74:41:74:46 | call to params | semmle.label | call to params | | ActiveRecordInjection.rb:74:41:74:51 | ...[...] | semmle.label | ...[...] | +| ActiveRecordInjection.rb:79:23:79:28 | call to params | semmle.label | call to params | +| ActiveRecordInjection.rb:79:23:79:35 | ...[...] | semmle.label | ...[...] | | ActiveRecordInjection.rb:83:17:83:22 | call to params | semmle.label | call to params | | ActiveRecordInjection.rb:83:17:83:31 | ...[...] | semmle.label | ...[...] | | ActiveRecordInjection.rb:84:19:84:24 | call to params | semmle.label | call to params | @@ -123,6 +127,7 @@ nodes | ActiveRecordInjection.rb:108:20:108:32 | ... + ... | semmle.label | ... + ... | | ActiveRecordInjection.rb:141:21:141:26 | call to params | semmle.label | call to params | | ActiveRecordInjection.rb:141:21:141:44 | ...[...] | semmle.label | ...[...] | +| ActiveRecordInjection.rb:141:21:141:44 | ...[...] | semmle.label | ...[...] | | ActiveRecordInjection.rb:155:27:155:76 | "this is an unsafe annotation:..." | semmle.label | "this is an unsafe annotation:..." | | ActiveRecordInjection.rb:155:59:155:64 | call to params | semmle.label | call to params | | ActiveRecordInjection.rb:155:59:155:74 | ...[...] | semmle.label | ...[...] | @@ -172,6 +177,7 @@ subpaths | ActiveRecordInjection.rb:61:16:61:21 | <<-SQL | ActiveRecordInjection.rb:62:21:62:26 | call to params | ActiveRecordInjection.rb:61:16:61:21 | <<-SQL | This SQL query depends on a $@. | ActiveRecordInjection.rb:62:21:62:26 | call to params | user-provided value | | ActiveRecordInjection.rb:68:20:68:47 | "user.id = '#{...}'" | ActiveRecordInjection.rb:68:34:68:39 | call to params | ActiveRecordInjection.rb:68:20:68:47 | "user.id = '#{...}'" | This SQL query depends on a $@. | ActiveRecordInjection.rb:68:34:68:39 | call to params | user-provided value | | ActiveRecordInjection.rb:74:32:74:54 | "id = '#{...}'" | ActiveRecordInjection.rb:74:41:74:46 | call to params | ActiveRecordInjection.rb:74:32:74:54 | "id = '#{...}'" | This SQL query depends on a $@. | ActiveRecordInjection.rb:74:41:74:46 | call to params | user-provided value | +| ActiveRecordInjection.rb:79:23:79:35 | ...[...] | ActiveRecordInjection.rb:79:23:79:28 | call to params | ActiveRecordInjection.rb:79:23:79:35 | ...[...] | This SQL query depends on a $@. | ActiveRecordInjection.rb:79:23:79:28 | call to params | user-provided value | | ActiveRecordInjection.rb:83:17:83:31 | ...[...] | ActiveRecordInjection.rb:83:17:83:22 | call to params | ActiveRecordInjection.rb:83:17:83:31 | ...[...] | This SQL query depends on a $@. | ActiveRecordInjection.rb:83:17:83:22 | call to params | user-provided value | | ActiveRecordInjection.rb:84:19:84:33 | ...[...] | ActiveRecordInjection.rb:84:19:84:24 | call to params | ActiveRecordInjection.rb:84:19:84:33 | ...[...] | This SQL query depends on a $@. | ActiveRecordInjection.rb:84:19:84:24 | call to params | user-provided value | | ActiveRecordInjection.rb:88:18:88:35 | ...[...] | ActiveRecordInjection.rb:88:18:88:23 | call to params | ActiveRecordInjection.rb:88:18:88:35 | ...[...] | This SQL query depends on a $@. | ActiveRecordInjection.rb:88:18:88:23 | call to params | user-provided value | @@ -179,6 +185,7 @@ subpaths | ActiveRecordInjection.rb:94:18:94:35 | ...[...] | ActiveRecordInjection.rb:94:18:94:23 | call to params | ActiveRecordInjection.rb:94:18:94:35 | ...[...] | This SQL query depends on a $@. | ActiveRecordInjection.rb:94:18:94:23 | call to params | user-provided value | | ActiveRecordInjection.rb:96:23:96:47 | ...[...] | ActiveRecordInjection.rb:96:23:96:28 | call to params | ActiveRecordInjection.rb:96:23:96:47 | ...[...] | This SQL query depends on a $@. | ActiveRecordInjection.rb:96:23:96:28 | call to params | user-provided value | | ActiveRecordInjection.rb:108:20:108:32 | ... + ... | ActiveRecordInjection.rb:102:10:102:15 | call to params | ActiveRecordInjection.rb:108:20:108:32 | ... + ... | This SQL query depends on a $@. | ActiveRecordInjection.rb:102:10:102:15 | call to params | user-provided value | +| ActiveRecordInjection.rb:141:21:141:44 | ...[...] | ActiveRecordInjection.rb:141:21:141:26 | call to params | ActiveRecordInjection.rb:141:21:141:44 | ...[...] | This SQL query depends on a $@. | ActiveRecordInjection.rb:141:21:141:26 | call to params | user-provided value | | ActiveRecordInjection.rb:155:27:155:76 | "this is an unsafe annotation:..." | ActiveRecordInjection.rb:155:59:155:64 | call to params | ActiveRecordInjection.rb:155:27:155:76 | "this is an unsafe annotation:..." | This SQL query depends on a $@. | ActiveRecordInjection.rb:155:59:155:64 | call to params | user-provided value | | ActiveRecordInjection.rb:168:37:168:41 | query | ActiveRecordInjection.rb:173:5:173:10 | call to params | ActiveRecordInjection.rb:168:37:168:41 | query | This SQL query depends on a $@. | ActiveRecordInjection.rb:173:5:173:10 | call to params | user-provided value | | ActiveRecordInjection.rb:177:43:177:104 | "SELECT * FROM users WHERE id ..." | ActiveRecordInjection.rb:173:5:173:10 | call to params | ActiveRecordInjection.rb:177:43:177:104 | "SELECT * FROM users WHERE id ..." | This SQL query depends on a $@. | ActiveRecordInjection.rb:173:5:173:10 | call to params | user-provided value | @@ -189,4 +196,4 @@ subpaths | PgInjection.rb:20:22:20:25 | qry2 | PgInjection.rb:6:12:6:17 | call to params | PgInjection.rb:20:22:20:25 | qry2 | This SQL query depends on a $@. | PgInjection.rb:6:12:6:17 | call to params | user-provided value | | PgInjection.rb:21:28:21:31 | qry2 | PgInjection.rb:6:12:6:17 | call to params | PgInjection.rb:21:28:21:31 | qry2 | This SQL query depends on a $@. | PgInjection.rb:6:12:6:17 | call to params | user-provided value | | PgInjection.rb:32:29:32:32 | qry3 | PgInjection.rb:6:12:6:17 | call to params | PgInjection.rb:32:29:32:32 | qry3 | This SQL query depends on a $@. | PgInjection.rb:6:12:6:17 | call to params | user-provided value | -| PgInjection.rb:44:29:44:32 | qry3 | PgInjection.rb:6:12:6:17 | call to params | PgInjection.rb:44:29:44:32 | qry3 | This SQL query depends on a $@. | PgInjection.rb:6:12:6:17 | call to params | user-provided value | \ No newline at end of file +| PgInjection.rb:44:29:44:32 | qry3 | PgInjection.rb:6:12:6:17 | call to params | PgInjection.rb:44:29:44:32 | qry3 | This SQL query depends on a $@. | PgInjection.rb:6:12:6:17 | call to params | user-provided value | diff --git a/shared/util/codeql/util/test/InlineExpectationsTest.qll b/shared/util/codeql/util/test/InlineExpectationsTest.qll index 83c6a851f7d..ec2f30c4119 100644 --- a/shared/util/codeql/util/test/InlineExpectationsTest.qll +++ b/shared/util/codeql/util/test/InlineExpectationsTest.qll @@ -501,6 +501,19 @@ module Make { class FalseNegativeExpectation = LegacyTest::FalseNegativeTestExpectation; class InvalidExpectation = LegacyTest::InvalidTestExpectation; + + /** + * Holds if the expectation `tag=value` is found in one or more expectation comments. + * + * This can be used when writing tests where the set of possible values must be known in advance, + * for example, when testing a predicate for which `value` is part of the binding set. + */ + predicate hasExpectationWithValue(string tag, string value) { + exists(string tags | + getAnExpectation(_, _, _, tags, value) and + tag = tags.splitAt(",") + ) + } } /** From d8604ff390e2519a1936b9e4c5c3c49533883f21 Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 7 Jul 2023 09:49:21 +0200 Subject: [PATCH 2/2] Ruby: exclude Object class from API graph --- ruby/ql/lib/codeql/ruby/ApiGraphs.qll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll index 0d6e669b1de..f002f327ba9 100644 --- a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll +++ b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll @@ -1096,13 +1096,13 @@ module API { /** The method accessed at `call`, synthetically treated as a separate object. */ MkMethodAccessNode(DataFlow::CallNode call) or /** The module object `mod` with epsilon edges to its ancestors. */ - MkModuleObjectUp(DataFlow::ModuleNode mod) or + MkModuleObjectUp(DataFlow::ModuleNode mod) { not mod.getQualifiedName() = "Object" } or /** The module object `mod` with epsilon edges to its descendents. */ - MkModuleObjectDown(DataFlow::ModuleNode mod) or + MkModuleObjectDown(DataFlow::ModuleNode mod) { not mod.getQualifiedName() = "Object" } or /** Instances of `mod` with epsilon edges to its ancestors. */ - MkModuleInstanceUp(DataFlow::ModuleNode mod) or + MkModuleInstanceUp(DataFlow::ModuleNode mod) { not mod.getQualifiedName() = "Object" } or /** Instances of `mod` with epsilon edges to its descendents, and to its upward node. */ - MkModuleInstanceDown(DataFlow::ModuleNode mod) or + MkModuleInstanceDown(DataFlow::ModuleNode mod) { not mod.getQualifiedName() = "Object" } or /** Intermediate node for following forward data flow. */ MkForwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or /** Intermediate node for following backward data flow. */