From f2403e2610c3f853644ea5e6debdc4f5ebb3fe01 Mon Sep 17 00:00:00 2001 From: Asger F Date: Wed, 18 May 2022 15:19:13 +0200 Subject: [PATCH 1/7] Ruby: port API graph doc comment --- ruby/ql/lib/codeql/ruby/ApiGraphs.qll | 72 ++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 2 deletions(-) diff --git a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll index 02d9bc18132..04e5aa1e66b 100644 --- a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll +++ b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll @@ -19,8 +19,76 @@ private import codeql.ruby.dataflow.internal.DataFlowDispatch as DataFlowDispatc */ module API { /** - * An abstract representation of a definition or use of an API component such as a Ruby module, - * or the result of a method call. + * A node in the API graph, representing a value that has crossed the boundary between this + * codebase and an external library (or in general, any external codebase). + * + * ### Basic usage + * + * API graphs are typically used to identify "API calls", that is, calls to an external function + * whose implementation is not necessarily part of the current codebase. + * + * The most basic use of API graphs is typically as follows: + * 1. Start with `API::getTopLevelMember` for the relevant library. + * 2. Follow up with a chain of accessors such as `getMethod` describing how to get to the relevant API function. + * 3. Map the resulting API graph nodes to data-flow nodes, using `asSource` or `asSink`. + * + * For example, a simplified way to get arguments to `Foo.bar` would be + * ```codeql + * API::getTopLevelMember("Foo").getMethod("bar").getParameter(0).asSink() + * ``` + * + * The most commonly used accessors are `getMember`, `getMethod`, `getParameter`, and `getReturn`. + * + * ### API graph nodes + * + * There are two kinds of nodes in the API graphs, distinguished by who is "holding" the value: + * - **Use-nodes** represent values held by the current codebase, which came from an external library. + * (The current codebase is "using" a value that came from the library). + * - **Def-nodes** represent values held by the external library, which came from this codebase. + * (The current codebase "defines" the value seen by the library). + * + * API graph nodes are associated with data-flow nodes in the current codebase. + * (Since external libraries are not part of the database, there is no way to associate with concrete + * data-flow nodes from the external library). + * - **Use-nodes** are associated with data-flow nodes where a value enters the current codebase, + * such as the return value of a call to an external function. + * - **Def-nodes** are associated with data-flow nodes where a value leaves the current codebase, + * such as an argument passed in a call to an external function. + * + * + * ### Access paths and edge labels + * + * Nodes in the API graph are associated with a set of access paths, describing a series of operations + * that may be performed to obtain that value. + * + * For example, the access path `API::getTopLevelMember("Foo").getMethod("bar")` represents the action of + * reading the top-level constant `Foo` and then accessing the method `bar` on the resulting object. + * It would be associated with a call such as `Foo.bar()`. + * + * Each edge in the graph is labelled by such an "operation". For an edge `A->B`, the type of the `A` node + * determines who is performing the operation, and the type of the `B` node determines who ends up holding + * the result: + * - An edge starting from a use-node describes what the current codebase is doing to a value that + * came from a library. + * - An edge starting from a def-node describes what the external library might do to a value that + * came from the current codebase. + * - An edge ending in a use-node means the result ends up in the current codebase (at its associated data-flow node). + * - An edge ending in a def-node means the result ends up in external code (its associated data-flow node is + * the place where it was "last seen" in the current codebase before flowing out) + * + * Because the implementation of the external library is not visible, it is not known exactly what operations + * it will perform on values that flow there. Instead, the edges starting from a def-node are operations that would + * lead to an observable effect within the current codebase; without knowing for certain if the library will actually perform + * those operations. (When constructing these edges, we assume the library is somewhat well-behaved). + * + * For example, given this snippet: + * ```ruby + * Foo.bar(->(x) { doSomething(x) }) + * ``` + * A callback is passed to the external function `Foo.bar`. We can't know if `Foo.bar` will actually invoke this callback. + * But _if_ the library should decide to invoke the callback, then a value will flow into the current codebase via the `x` parameter. + * For that reason, an edge is generated representing the argument-passing operation that might be performed by `Foo.bar`. + * This edge is going from the def-node associated with the callback to the use-node associated with the parameter `x` of the lambda. */ class Node extends Impl::TApiNode { /** From 573c5c5efebd2b97f7ad5a2bedc83fd30a06e1ad Mon Sep 17 00:00:00 2001 From: Asger F Date: Mon, 30 May 2022 13:25:50 +0200 Subject: [PATCH 2/7] Ruby: Rename getAnImmediateUse -> asSource --- ruby/ql/lib/codeql/ruby/ApiGraphs.qll | 8 +++----- ruby/ql/lib/codeql/ruby/frameworks/Rails.qll | 6 +----- ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll | 2 +- ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll | 4 ++-- .../lib/codeql/ruby/frameworks/http_clients/Faraday.qll | 4 ++-- .../codeql/ruby/frameworks/http_clients/HttpClient.qll | 5 ++--- .../lib/codeql/ruby/frameworks/http_clients/Httparty.qll | 2 +- .../lib/codeql/ruby/frameworks/http_clients/NetHttp.qll | 4 ++-- .../lib/codeql/ruby/frameworks/http_clients/OpenURI.qll | 2 +- .../codeql/ruby/frameworks/http_clients/RestClient.qll | 2 +- .../lib/codeql/ruby/frameworks/http_clients/Typhoeus.qll | 2 +- 11 files changed, 17 insertions(+), 24 deletions(-) diff --git a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll index 04e5aa1e66b..9c34a82aaab 100644 --- a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll +++ b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll @@ -111,7 +111,7 @@ module API { * Unlike `getAUse()`, this predicate only gets the immediate references, not the indirect uses * found via data flow. */ - DataFlow::LocalSourceNode getAnImmediateUse() { Impl::use(this, result) } + DataFlow::LocalSourceNode asSource() { Impl::use(this, result) } /** * Gets a data-flow node corresponding the value flowing into this API component. @@ -126,9 +126,7 @@ module API { /** * Gets a call to a method on the receiver represented by this API component. */ - DataFlow::CallNode getAMethodCall(string method) { - result = this.getReturn(method).getAnImmediateUse() - } + DataFlow::CallNode getAMethodCall(string method) { result = this.getReturn(method).asSource() } /** * Gets a node representing member `m` of this API component. @@ -203,7 +201,7 @@ module API { /** * Gets a `new` call to the function represented by this API component. */ - DataFlow::ExprNode getAnInstantiation() { result = this.getInstance().getAnImmediateUse() } + DataFlow::ExprNode getAnInstantiation() { result = this.getInstance().asSource() } /** * Gets a node representing a (direct or indirect) subclass of the class represented by this node. diff --git a/ruby/ql/lib/codeql/ruby/frameworks/Rails.qll b/ruby/ql/lib/codeql/ruby/frameworks/Rails.qll index 54c27fc994d..9b45d220486 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/Rails.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/Rails.qll @@ -63,11 +63,7 @@ private module Config { ) or // `Rails.application.config` - this = - API::getTopLevelMember("Rails") - .getReturn("application") - .getReturn("config") - .getAnImmediateUse() + this = API::getTopLevelMember("Rails").getReturn("application").getReturn("config").asSource() or // `Rails.application.configure { ... config ... }` // `Rails::Application.configure { ... config ... }` diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll index a3d92b486c3..615568fec6a 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll @@ -25,7 +25,7 @@ private import codeql.ruby.dataflow.RemoteFlowSources * A remote flow source originating from a CSV source row. */ private class RemoteFlowSourceFromCsv extends RemoteFlowSource::Range { - RemoteFlowSourceFromCsv() { this = ModelOutput::getASourceNode("remote").getAnImmediateUse() } + RemoteFlowSourceFromCsv() { this = ModelOutput::getASourceNode("remote").asSource() } override string getSourceType() { result = "Remote flow (from model)" } } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll index 83d62d196ee..ddd66ecf6a0 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll @@ -30,8 +30,8 @@ class ExconHttpRequest extends HTTP::Client::Request::Range { DataFlow::Node connectionUse; ExconHttpRequest() { - requestUse = requestNode.getAnImmediateUse() and - connectionUse = connectionNode.getAnImmediateUse() and + requestUse = requestNode.asSource() and + connectionUse = connectionNode.asSource() and connectionNode = [ // one-off requests diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll index f74c9059e70..b9367ee4765 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll @@ -38,8 +38,8 @@ class FaradayHttpRequest extends HTTP::Client::Request::Range { ] and requestNode = connectionNode.getReturn(["get", "head", "delete", "post", "put", "patch", "trace"]) and - requestUse = requestNode.getAnImmediateUse() and - connectionUse = connectionNode.getAnImmediateUse() and + requestUse = requestNode.asSource() and + connectionUse = connectionNode.asSource() and this = requestUse.asExpr().getExpr() } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll index 5a3ef8c355c..f2f2b7b0440 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll @@ -29,7 +29,7 @@ class HttpClientRequest extends HTTP::Client::Request::Range { API::getTopLevelMember("HTTPClient").getInstance() ] and requestNode = connectionNode.getReturn(method) and - requestUse = requestNode.getAnImmediateUse() and + requestUse = requestNode.asSource() and method in [ "get", "head", "delete", "options", "post", "put", "trace", "get_content", "post_content" ] and @@ -52,8 +52,7 @@ class HttpClientRequest extends HTTP::Client::Request::Range { // Look for calls to set // `c.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE` // on an HTTPClient connection object `c`. - disablingNode = - connectionNode.getReturn("ssl_config").getReturn("verify_mode=").getAnImmediateUse() and + disablingNode = connectionNode.getReturn("ssl_config").getReturn("verify_mode=").asSource() and disablingNode.(DataFlow::CallNode).getArgument(0) = API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse() } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Httparty.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Httparty.qll index 29197a71e29..f92010c03cf 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Httparty.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Httparty.qll @@ -28,7 +28,7 @@ class HttpartyRequest extends HTTP::Client::Request::Range { DataFlow::CallNode requestUse; HttpartyRequest() { - requestUse = requestNode.getAnImmediateUse() and + requestUse = requestNode.asSource() and requestNode = API::getTopLevelMember("HTTParty") .getReturn(["get", "head", "delete", "options", "post", "put", "patch"]) and diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll index 5f0e6903094..c62107f525f 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll @@ -25,7 +25,7 @@ class NetHttpRequest extends HTTP::Client::Request::Range { NetHttpRequest() { exists(string method | - request = requestNode.getAnImmediateUse() and + request = requestNode.asSource() and this = request.asExpr().getExpr() | // Net::HTTP.get(...) @@ -59,7 +59,7 @@ class NetHttpRequest extends HTTP::Client::Request::Range { new = API::getTopLevelMember("Net").getMember("HTTP").getInstance() and requestNode = new.getReturn(_) | - result = new.getAnImmediateUse().(DataFlow::CallNode).getArgument(0) + result = new.asSource().(DataFlow::CallNode).getArgument(0) ) } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll index c76310bf150..aa93083f6a4 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll @@ -28,7 +28,7 @@ class OpenUriRequest extends HTTP::Client::Request::Range { [API::getTopLevelMember("URI"), API::getTopLevelMember("URI").getReturn("parse")] .getReturn("open"), API::getTopLevelMember("OpenURI").getReturn("open_uri") ] and - requestUse = requestNode.getAnImmediateUse() and + requestUse = requestNode.asSource() and this = requestUse.asExpr().getExpr() } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll index 19bd89d8b23..9e55aee25b5 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll @@ -22,7 +22,7 @@ class RestClientHttpRequest extends HTTP::Client::Request::Range { API::Node connectionNode; RestClientHttpRequest() { - requestUse = requestNode.getAnImmediateUse() and + requestUse = requestNode.asSource() and this = requestUse.asExpr().getExpr() and ( connectionNode = diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Typhoeus.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Typhoeus.qll index d3de66a6758..2102e255ce9 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Typhoeus.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Typhoeus.qll @@ -19,7 +19,7 @@ class TyphoeusHttpRequest extends HTTP::Client::Request::Range { API::Node requestNode; TyphoeusHttpRequest() { - requestUse = requestNode.getAnImmediateUse() and + requestUse = requestNode.asSource() and requestNode = API::getTopLevelMember("Typhoeus") .getReturn(["get", "head", "delete", "options", "post", "put", "patch"]) and From 2f8086bb57d00398ec5125e1f84e095e1b74c0b7 Mon Sep 17 00:00:00 2001 From: Asger F Date: Mon, 30 May 2022 13:27:20 +0200 Subject: [PATCH 3/7] Ruby: Rename getAUse -> getAValueReachableFromSource --- ruby/ql/lib/codeql/ruby/ApiGraphs.qll | 4 ++-- .../ruby/frameworks/ActionController.qll | 2 +- .../codeql/ruby/frameworks/ActiveRecord.qll | 2 +- .../ql/lib/codeql/ruby/frameworks/GraphQL.qll | 21 ++++++++++++++++--- .../lib/codeql/ruby/frameworks/XmlParsing.qll | 2 +- .../lib/codeql/ruby/frameworks/core/Hash.qll | 6 ++++-- .../ruby/frameworks/http_clients/Excon.qll | 6 ++++-- .../ruby/frameworks/http_clients/Faraday.qll | 9 ++++++-- .../frameworks/http_clients/HttpClient.qll | 5 ++++- .../ruby/frameworks/http_clients/NetHttp.qll | 5 ++++- .../ruby/frameworks/http_clients/OpenURI.qll | 6 +++++- .../frameworks/http_clients/RestClient.qll | 9 ++++++-- .../library-tests/dataflow/api-graphs/use.ql | 2 +- 13 files changed, 59 insertions(+), 20 deletions(-) diff --git a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll index 9c34a82aaab..c55795946a9 100644 --- a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll +++ b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll @@ -99,7 +99,7 @@ module API { * * This includes indirect uses found via data flow. */ - DataFlow::Node getAUse() { + DataFlow::Node getAValueReachableFromSource() { exists(DataFlow::LocalSourceNode src | Impl::use(this, src) | Impl::trackUseNode(src).flowsTo(result) ) @@ -108,7 +108,7 @@ module API { /** * Gets an immediate use of the API component represented by this node. * - * Unlike `getAUse()`, this predicate only gets the immediate references, not the indirect uses + * Unlike `getAValueReachableFromSource()`, this predicate only gets the immediate references, not the indirect uses * found via data flow. */ DataFlow::LocalSourceNode asSource() { Impl::use(this, result) } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll index f034e229e32..36cfcb230f3 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll @@ -33,7 +33,7 @@ class ActionControllerControllerClass extends ClassDeclaration { // In Rails applications `ApplicationController` typically extends `ActionController::Base`, but we // treat it separately in case the `ApplicationController` definition is not in the database. API::getTopLevelMember("ApplicationController") - ].getASubclass().getAUse().asExpr().getExpr() + ].getASubclass().getAValueReachableFromSource().asExpr().getExpr() } /** diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll index 0846e30d047..a394a828ac7 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll @@ -54,7 +54,7 @@ class ActiveRecordModelClass extends ClassDeclaration { // In Rails applications `ApplicationRecord` typically extends `ActiveRecord::Base`, but we // treat it separately in case the `ApplicationRecord` definition is not in the database. API::getTopLevelMember("ApplicationRecord") - ].getASubclass().getAUse().asExpr().getExpr() + ].getASubclass().getAValueReachableFromSource().asExpr().getExpr() } // Gets the class declaration for this class and all of its super classes diff --git a/ruby/ql/lib/codeql/ruby/frameworks/GraphQL.qll b/ruby/ql/lib/codeql/ruby/frameworks/GraphQL.qll index a7ac5cab6cd..3dacd89b25e 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/GraphQL.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/GraphQL.qll @@ -41,7 +41,12 @@ private API::Node graphQlSchema() { result = API::getTopLevelMember("GraphQL").g private class GraphqlRelayClassicMutationClass extends ClassDeclaration { GraphqlRelayClassicMutationClass() { this.getSuperclassExpr() = - graphQlSchema().getMember("RelayClassicMutation").getASubclass*().getAUse().asExpr().getExpr() + graphQlSchema() + .getMember("RelayClassicMutation") + .getASubclass*() + .getAValueReachableFromSource() + .asExpr() + .getExpr() } } @@ -71,7 +76,12 @@ private class GraphqlRelayClassicMutationClass extends ClassDeclaration { private class GraphqlSchemaResolverClass extends ClassDeclaration { GraphqlSchemaResolverClass() { this.getSuperclassExpr() = - graphQlSchema().getMember("Resolver").getASubclass().getAUse().asExpr().getExpr() + graphQlSchema() + .getMember("Resolver") + .getASubclass() + .getAValueReachableFromSource() + .asExpr() + .getExpr() } } @@ -92,7 +102,12 @@ private class GraphqlSchemaResolverClass extends ClassDeclaration { class GraphqlSchemaObjectClass extends ClassDeclaration { GraphqlSchemaObjectClass() { this.getSuperclassExpr() = - graphQlSchema().getMember("Object").getASubclass().getAUse().asExpr().getExpr() + graphQlSchema() + .getMember("Object") + .getASubclass() + .getAValueReachableFromSource() + .asExpr() + .getExpr() } /** Gets a `GraphqlFieldDefinitionMethodCall` called in this class. */ diff --git a/ruby/ql/lib/codeql/ruby/frameworks/XmlParsing.qll b/ruby/ql/lib/codeql/ruby/frameworks/XmlParsing.qll index cd03951304b..73cefe8d255 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/XmlParsing.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/XmlParsing.qll @@ -143,7 +143,7 @@ private DataFlow::LocalSourceNode trackFeature(Feature f, boolean enable, TypeTr or // Use of a constant f enable = true and - result = parseOptionsModule().getMember(f.getConstantName()).getAUse() + result = parseOptionsModule().getMember(f.getConstantName()).getAValueReachableFromSource() or // Treat `&`, `&=`, `|` and `|=` operators as if they preserve the on/off states // of their operands. This is an overapproximation but likely to work well in practice diff --git a/ruby/ql/lib/codeql/ruby/frameworks/core/Hash.qll b/ruby/ql/lib/codeql/ruby/frameworks/core/Hash.qll index dd628d4929d..87733c0af9b 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/core/Hash.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/core/Hash.qll @@ -99,7 +99,8 @@ module Hash { HashNewSummary() { this = "Hash[]" } final override ElementReference getACall() { - result.getReceiver() = API::getTopLevelMember("Hash").getAUse().asExpr().getExpr() and + result.getReceiver() = + API::getTopLevelMember("Hash").getAValueReachableFromSource().asExpr().getExpr() and result.getNumberOfArguments() = 1 } @@ -138,7 +139,8 @@ module Hash { } final override ElementReference getACall() { - result.getReceiver() = API::getTopLevelMember("Hash").getAUse().asExpr().getExpr() and + result.getReceiver() = + API::getTopLevelMember("Hash").getAValueReachableFromSource().asExpr().getExpr() and key = result.getArgument(i - 1).getConstantValue() and exists(result.getArgument(i)) } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll index ddd66ecf6a0..fdfd1f92096 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Excon.qll @@ -66,7 +66,8 @@ class ExconHttpRequest extends HTTP::Client::Request::Range { override predicate disablesCertificateValidation(DataFlow::Node disablingNode) { // Check for `ssl_verify_peer: false` in the options hash. exists(DataFlow::Node arg, int i | - i > 0 and arg = connectionNode.getAUse().(DataFlow::CallNode).getArgument(i) + i > 0 and + arg = connectionNode.getAValueReachableFromSource().(DataFlow::CallNode).getArgument(i) | argSetsVerifyPeer(arg, false, disablingNode) ) @@ -79,7 +80,8 @@ class ExconHttpRequest extends HTTP::Client::Request::Range { disableCall.asExpr().getASuccessor+() = requestUse.asExpr() and disablingNode = disableCall and not exists(DataFlow::Node arg, int i | - i > 0 and arg = connectionNode.getAUse().(DataFlow::CallNode).getArgument(i) + i > 0 and + arg = connectionNode.getAValueReachableFromSource().(DataFlow::CallNode).getArgument(i) | argSetsVerifyPeer(arg, true, _) ) diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll index b9367ee4765..59d2e33dad5 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/Faraday.qll @@ -58,7 +58,8 @@ class FaradayHttpRequest extends HTTP::Client::Request::Range { // or // `{ ssl: { verify_mode: OpenSSL::SSL::VERIFY_NONE } }` exists(DataFlow::Node arg, int i | - i > 0 and arg = connectionNode.getAUse().(DataFlow::CallNode).getArgument(i) + i > 0 and + arg = connectionNode.getAValueReachableFromSource().(DataFlow::CallNode).getArgument(i) | // Either passed as an individual key:value argument, e.g.: // Faraday.new(..., ssl: {...}) @@ -132,7 +133,11 @@ private predicate isVerifyModeNonePair(CfgNodes::ExprNodes::PairCfgNode p) { key.asExpr() = p.getKey() and value.asExpr() = p.getValue() and isSymbolLiteral(key, "verify_mode") and - value = API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse() + value = + API::getTopLevelMember("OpenSSL") + .getMember("SSL") + .getMember("VERIFY_NONE") + .getAValueReachableFromSource() ) } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll index f2f2b7b0440..2f63fd96005 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/HttpClient.qll @@ -54,7 +54,10 @@ class HttpClientRequest extends HTTP::Client::Request::Range { // on an HTTPClient connection object `c`. disablingNode = connectionNode.getReturn("ssl_config").getReturn("verify_mode=").asSource() and disablingNode.(DataFlow::CallNode).getArgument(0) = - API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse() + API::getTopLevelMember("OpenSSL") + .getMember("SSL") + .getMember("VERIFY_NONE") + .getAValueReachableFromSource() } override string getFramework() { result = "HTTPClient" } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll index c62107f525f..3396ede11da 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/NetHttp.qll @@ -73,7 +73,10 @@ class NetHttpRequest extends HTTP::Client::Request::Range { // foo.request(...) exists(DataFlow::CallNode setter | disablingNode = - API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse() and + API::getTopLevelMember("OpenSSL") + .getMember("SSL") + .getMember("VERIFY_NONE") + .getAValueReachableFromSource() and setter.asExpr().getExpr().(SetterMethodCall).getMethodName() = "verify_mode=" and disablingNode = setter.getArgument(0) and localFlow(setter.getReceiver(), request.getReceiver()) diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll index aa93083f6a4..565ac2e85ca 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/OpenURI.qll @@ -110,7 +110,11 @@ private predicate isSslVerifyModeNonePair(CfgNodes::ExprNodes::PairCfgNode p) { key.asExpr() = p.getKey() and value.asExpr() = p.getValue() and isSslVerifyModeLiteral(key) and - value = API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse() + value = + API::getTopLevelMember("OpenSSL") + .getMember("SSL") + .getMember("VERIFY_NONE") + .getAValueReachableFromSource() ) } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll index 9e55aee25b5..41e4c1fbcd2 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/http_clients/RestClient.qll @@ -52,7 +52,8 @@ class RestClientHttpRequest extends HTTP::Client::Request::Range { // `RestClient::Resource::new` takes an options hash argument, and we're // looking for `{ verify_ssl: OpenSSL::SSL::VERIFY_NONE }`. exists(DataFlow::Node arg, int i | - i > 0 and arg = connectionNode.getAUse().(DataFlow::CallNode).getArgument(i) + i > 0 and + arg = connectionNode.getAValueReachableFromSource().(DataFlow::CallNode).getArgument(i) | // Either passed as an individual key:value argument, e.g.: // RestClient::Resource.new(..., verify_ssl: OpenSSL::SSL::VERIFY_NONE) @@ -79,7 +80,11 @@ private predicate isVerifySslNonePair(CfgNodes::ExprNodes::PairCfgNode p) { key.asExpr() = p.getKey() and value.asExpr() = p.getValue() and isSslVerifyModeLiteral(key) and - value = API::getTopLevelMember("OpenSSL").getMember("SSL").getMember("VERIFY_NONE").getAUse() + value = + API::getTopLevelMember("OpenSSL") + .getMember("SSL") + .getMember("VERIFY_NONE") + .getAValueReachableFromSource() ) } diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/use.ql b/ruby/ql/test/library-tests/dataflow/api-graphs/use.ql index 63728b0a14b..4c96b0cd789 100644 --- a/ruby/ql/test/library-tests/dataflow/api-graphs/use.ql +++ b/ruby/ql/test/library-tests/dataflow/api-graphs/use.ql @@ -26,7 +26,7 @@ class ApiUseTest extends InlineExpectationsTest { l = n.getLocation() and ( tag = "use" and - n = a.getAUse() + n = a.getAValueReachableFromSource() or tag = "def" and n = a.getARhs() From 7c877c7861f122ca24fc78995cba1b5b135f3913 Mon Sep 17 00:00:00 2001 From: Asger F Date: Mon, 30 May 2022 13:27:50 +0200 Subject: [PATCH 4/7] Ruby: Rename getARhs -> asSink --- ruby/ql/lib/codeql/ruby/ApiGraphs.qll | 4 ++-- ruby/ql/test/library-tests/dataflow/api-graphs/use.ql | 2 +- ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll index c55795946a9..b129142ab8f 100644 --- a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll +++ b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll @@ -116,12 +116,12 @@ module API { /** * Gets a data-flow node corresponding the value flowing into this API component. */ - DataFlow::Node getARhs() { Impl::def(this, result) } + DataFlow::Node asSink() { Impl::def(this, result) } /** * Gets a data-flow node that may interprocedurally flow to the value escaping into this API component. */ - DataFlow::Node getAValueReachingRhs() { result = Impl::trackDefNode(this.getARhs()) } + DataFlow::Node getAValueReachingRhs() { result = Impl::trackDefNode(this.asSink()) } /** * Gets a call to a method on the receiver represented by this API component. diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/use.ql b/ruby/ql/test/library-tests/dataflow/api-graphs/use.ql index 4c96b0cd789..c47939a8405 100644 --- a/ruby/ql/test/library-tests/dataflow/api-graphs/use.ql +++ b/ruby/ql/test/library-tests/dataflow/api-graphs/use.ql @@ -29,7 +29,7 @@ class ApiUseTest extends InlineExpectationsTest { n = a.getAValueReachableFromSource() or tag = "def" and - n = a.getARhs() + n = a.asSink() or tag = "call" and n = a.(API::MethodAccessNode).getCallNode() diff --git a/ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql b/ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql index a1ee8a05332..c81af2546e7 100644 --- a/ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql +++ b/ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql @@ -132,7 +132,7 @@ class CustomValueSink extends DefaultValueFlowConf { override predicate isSink(DataFlow::Node sink) { super.isSink(sink) or - sink = ModelOutput::getASinkNode("test-sink").getARhs() + sink = ModelOutput::getASinkNode("test-sink").asSink() } } @@ -140,7 +140,7 @@ class CustomTaintSink extends DefaultTaintFlowConf { override predicate isSink(DataFlow::Node sink) { super.isSink(sink) or - sink = ModelOutput::getASinkNode("test-sink").getARhs() + sink = ModelOutput::getASinkNode("test-sink").asSink() } } From 9838e2e10164dd571c13d8b592ea30560918cbcf Mon Sep 17 00:00:00 2001 From: Asger F Date: Mon, 30 May 2022 13:28:18 +0200 Subject: [PATCH 5/7] Ruby: Rename getAValueReachingRhs -> getAValueReachingSink --- ruby/ql/lib/codeql/ruby/ApiGraphs.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll index b129142ab8f..eaf6ecb009b 100644 --- a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll +++ b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll @@ -121,7 +121,7 @@ module API { /** * Gets a data-flow node that may interprocedurally flow to the value escaping into this API component. */ - DataFlow::Node getAValueReachingRhs() { result = Impl::trackDefNode(this.asSink()) } + DataFlow::Node getAValueReachingSink() { result = Impl::trackDefNode(this.asSink()) } /** * Gets a call to a method on the receiver represented by this API component. From d15b90e21acdfebb75074756e278c4d5d38f90a1 Mon Sep 17 00:00:00 2001 From: Asger F Date: Mon, 30 May 2022 13:31:31 +0200 Subject: [PATCH 6/7] Ruby: Add deprecation --- ruby/ql/lib/codeql/ruby/ApiGraphs.qll | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll index eaf6ecb009b..1759f42ad94 100644 --- a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll +++ b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll @@ -123,6 +123,18 @@ module API { */ DataFlow::Node getAValueReachingSink() { result = Impl::trackDefNode(this.asSink()) } + /** DEPRECATED. This predicate has been renamed to `getAValueReachableFromSource()`. */ + deprecated DataFlow::Node getAUse() { result = this.getAValueReachableFromSource() } + + /** DEPRECATED. This predicate has been renamed to `asSource()`. */ + deprecated DataFlow::LocalSourceNode getAnImmediateUse() { result = this.asSource() } + + /** DEPRECATED. This predicate has been renamed to `asSink()`. */ + deprecated DataFlow::Node getARhs() { result = this.asSink() } + + /** DEPRECATED. This predicate has been renamed to `getAValueReachingSink()`. */ + deprecated DataFlow::Node getAValueReachingRhs() { result = this.getAValueReachingSink() } + /** * Gets a call to a method on the receiver represented by this API component. */ From a1af9c3d7d23c82dfb5e2bfe6c0bae341aaca53a Mon Sep 17 00:00:00 2001 From: Asger F Date: Tue, 14 Jun 2022 11:02:17 +0200 Subject: [PATCH 7/7] Ruby: update predicate docs --- ruby/ql/lib/codeql/ruby/ApiGraphs.qll | 48 +++++++++++++++++++++------ 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll index 1759f42ad94..3be0e036b74 100644 --- a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll +++ b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll @@ -92,12 +92,10 @@ module API { */ class Node extends Impl::TApiNode { /** - * Gets a data-flow node corresponding to a use of the API component represented by this node. + * Gets a data-flow node where this value may flow after entering the current codebase. * - * For example, `Kernel.format "%s world!", "Hello"` is a use of the return of the `format` function of - * the `Kernel` module. - * - * This includes indirect uses found via data flow. + * This is similar to `asSource()` but additionally includes nodes that are transitively reachable by data flow. + * See `asSource()` for examples. */ DataFlow::Node getAValueReachableFromSource() { exists(DataFlow::LocalSourceNode src | Impl::use(this, src) | @@ -106,20 +104,50 @@ module API { } /** - * Gets an immediate use of the API component represented by this node. + * Gets a data-flow node where this value enters the current codebase. * - * Unlike `getAValueReachableFromSource()`, this predicate only gets the immediate references, not the indirect uses - * found via data flow. + * For example: + * ```ruby + * # API::getTopLevelMember("Foo").asSource() + * Foo + * + * # API::getTopLevelMember("Foo").getMethod("bar").getReturn().asSource() + * Foo.bar + * + * # 'x' is found by: + * # API::getTopLevelMember("Foo").getMethod("bar").getBlock().getParameter(0).asSource() + * Foo.bar do |x| + * end + * ``` */ DataFlow::LocalSourceNode asSource() { Impl::use(this, result) } /** - * Gets a data-flow node corresponding the value flowing into this API component. + * Gets a data-flow node where this value leaves the current codebase and flows into an + * external library (or in general, any external codebase). + * + * Concretely, this corresponds to an argument passed to a call to external code. + * + * For example: + * ```ruby + * # 'x' is found by: + * # API::getTopLevelMember("Foo").getMethod("bar").getParameter(0).asSink() + * Foo.bar(x) + * + * Foo.bar(-> { + * # 'x' is found by: + * # API::getTopLevelMember("Foo").getMethod("bar").getParameter(0).getReturn().asSink() + * x + * }) + * ``` */ DataFlow::Node asSink() { Impl::def(this, result) } /** - * Gets a data-flow node that may interprocedurally flow to the value escaping into this API component. + * Get a data-flow node that transitively flows to an external library (or in general, any external codebase). + * + * This is similar to `asSink()` but additionally includes nodes that transitively reach a sink by data flow. + * See `asSink()` for examples. */ DataFlow::Node getAValueReachingSink() { result = Impl::trackDefNode(this.asSink()) }