Ruby: overhaul API graphs

This commit is contained in:
Asger F
2023-06-19 12:01:42 +02:00
parent b572974536
commit 0110610c6a
16 changed files with 1940 additions and 912 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -6,12 +6,17 @@ private import codeql.ruby.typetracking.TypeTracker
private import codeql.ruby.dataflow.SSA
private import FlowSummaryImpl as FlowSummaryImpl
private import SsaImpl as SsaImpl
private import codeql.ruby.ApiGraphs
/**
* An element, viewed as a node in a data flow graph. Either an expression
* (`ExprNode`) or a parameter (`ParameterNode`).
*/
class Node extends TNode {
/** Starts backtracking from this node using API graphs. */
pragma[inline]
API::Node backtrack() { result = API::Internal::getNodeForBacktracking(this) }
/** Gets the expression corresponding to this node, if any. */
CfgNodes::ExprCfgNode asExpr() { result = this.(ExprNode).getExprNode() }
@@ -76,6 +81,11 @@ class Node extends TNode {
result.asCallableAstNode() = c.asCallable()
)
}
/** Gets the enclosing method, if any. */
MethodNode getEnclosingMethod() {
result.asCallableAstNode() = this.asExpr().getExpr().getEnclosingMethod()
}
}
/** A data-flow node corresponding to a call in the control-flow graph. */
@@ -144,6 +154,18 @@ class CallNode extends LocalSourceNode, ExprNode {
result.asExpr() = pair.getValue()
)
}
/**
* Gets a potential target of this call, if any.
*/
final CallableNode getATarget() {
result.asCallableAstNode() = this.asExpr().getExpr().(Call).getATarget()
}
/**
* Holds if this is a `super` call.
*/
final predicate isSuperCall() { this.asExpr().getExpr() instanceof SuperCall }
}
/**
@@ -217,6 +239,10 @@ class SelfParameterNode extends ParameterNode instanceof SelfParameterNodeImpl {
class LocalSourceNode extends Node {
LocalSourceNode() { isLocalSourceNode(this) }
/** Starts tracking this node forward using API graphs. */
pragma[inline]
API::Node track() { result = API::Internal::getNodeForForwardTracking(this) }
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
pragma[inline]
predicate flowsTo(Node nodeTo) { hasLocalSource(nodeTo, this) }
@@ -359,6 +385,11 @@ private module Cached {
)
}
cached
predicate methodHasSuperCall(MethodNode method, CallNode call) {
call.isSuperCall() and method = call.getEnclosingMethod()
}
/**
* A place in which a named constant can be looked up during constant lookup.
*/
@@ -387,6 +418,40 @@ private module Cached {
result.asExpr().getExpr() = access
}
/**
* Gets a module for which this constant is the reference to an ancestor module.
*
* For example, `M` is the ancestry target of `C` in the following examples:
* ```rb
* class M < C {}
*
* module M
* include C
* end
*
* module M
* prepend C
* end
* ```
*/
private ModuleNode getAncestryTarget(ConstRef constRef) { result.getAnAncestorExpr() = constRef }
/**
* Gets a scope in which a constant lookup may access the contents of the module referenced by this constant.
*/
cached
TConstLookupScope getATargetScope(ConstRef constRef) {
forceCachingInSameStage() and
result = MkAncestorLookup(getAncestryTarget(constRef).getAnImmediateDescendent*())
or
constRef.asConstantAccess() = any(ConstantAccess ac).getScopeExpr() and
result = MkQualifiedLookup(constRef.asConstantAccess())
or
result = MkNestedLookup(getAncestryTarget(constRef))
or
result = MkExactLookup(constRef.asConstantAccess().(Namespace).getModule())
}
cached
predicate forceCachingInSameStage() { any() }
@@ -1028,6 +1093,33 @@ class ModuleNode instanceof Module {
* this predicate.
*/
ModuleNode getNestedModule(string name) { result = super.getNestedModule(name) }
/**
* Starts tracking the module object using API graphs.
*
* Concretely, this tracks forward from the following starting points:
* - A constant access that resolves to this module.
* - `self` in the module scope or in a singleton method of the module.
* - A call to `self.class` in an instance method of this module or an ancestor module.
*/
bindingset[this]
pragma[inline]
API::Node trackModule() { result = API::Internal::getModuleNode(this) }
/**
* Starts tracking instances of this module forward using API graphs.
*
* Concretely, this tracks forward from the following starting points:
* - `self` in instance methods of this module and ancestor modules
* - Calls to `new` on the module object
*
* Note that this includes references to `self` in ancestor modules, but not in descendent modules.
* This is usually the desired behavior, particularly if this module was itself found using
* a call to `getADescendentModule()`.
*/
bindingset[this]
pragma[inline]
API::Node trackInstance() { result = API::Internal::getModuleInstance(this) }
}
/**
@@ -1216,6 +1308,9 @@ class MethodNode extends CallableNode {
/** Holds if this method is protected. */
predicate isProtected() { this.asCallableAstNode().isProtected() }
/** Gets a `super` call in this method. */
CallNode getASuperCall() { methodHasSuperCall(this, result) }
}
/**
@@ -1291,6 +1386,11 @@ class ArrayLiteralNode extends LocalSourceNode, ExprNode {
* Gets an element of the array.
*/
Node getAnElement() { result = this.(CallNode).getPositionalArgument(_) }
/**
* Gets the `n`th element of the array.
*/
Node getElement(int n) { result = this.(CallNode).getPositionalArgument(n) }
}
/**
@@ -1331,24 +1431,6 @@ class ConstRef extends LocalSourceNode {
not exists(access.getScopeExpr())
}
/**
* Gets a module for which this constant is the reference to an ancestor module.
*
* For example, `M` is the ancestry target of `C` in the following examples:
* ```rb
* class M < C {}
*
* module M
* include C
* end
*
* module M
* prepend C
* end
* ```
*/
private ModuleNode getAncestryTarget() { result.getAnAncestorExpr() = this }
/**
* Gets the known target module.
*
@@ -1356,22 +1438,6 @@ class ConstRef extends LocalSourceNode {
*/
private Module getExactTarget() { result.getAnImmediateReference() = access }
/**
* Gets a scope in which a constant lookup may access the contents of the module referenced by this constant.
*/
cached
private TConstLookupScope getATargetScope() {
forceCachingInSameStage() and
result = MkAncestorLookup(this.getAncestryTarget().getAnImmediateDescendent*())
or
access = any(ConstantAccess ac).getScopeExpr() and
result = MkQualifiedLookup(access)
or
result = MkNestedLookup(this.getAncestryTarget())
or
result = MkExactLookup(access.(Namespace).getModule())
}
/**
* Gets the scope expression, or the immediately enclosing `Namespace` (skipping over singleton classes).
*
@@ -1433,7 +1499,7 @@ class ConstRef extends LocalSourceNode {
pragma[inline]
ConstRef getConstant(string name) {
exists(TConstLookupScope scope |
pragma[only_bind_into](scope) = pragma[only_bind_out](this).getATargetScope() and
pragma[only_bind_into](scope) = getATargetScope(pragma[only_bind_out](this)) and
result.accesses(pragma[only_bind_out](scope), name)
)
}
@@ -1455,7 +1521,14 @@ class ConstRef extends LocalSourceNode {
* end
* ```
*/
ModuleNode getADescendentModule() { MkAncestorLookup(result) = this.getATargetScope() }
pragma[inline]
ModuleNode getADescendentModule() { result = getADescendentModuleInline(this) }
}
bindingset[ref]
pragma[inline_late]
private ModuleNode getADescendentModuleInline(ConstRef ref) {
MkAncestorLookup(result) = getATargetScope(ref)
}
/**

View File

@@ -44,7 +44,7 @@ private class SummarizedCallableFromModel extends SummarizedCallable {
override Call getACall() {
exists(API::MethodAccessNode base |
ModelOutput::resolvedSummaryBase(type, path, base) and
result = base.getCallNode().asExpr().getExpr()
result = base.asCall().asExpr().getExpr()
)
}

View File

@@ -99,9 +99,10 @@ API::Node getExtraNodeFromPath(string type, AccessPath path, int n) {
// A row of form `any;Method[foo]` should match any method named `foo`.
type = "any" and
n = 1 and
exists(EntryPointFromAnyType entry |
methodMatchedByName(path, entry.getName()) and
result = entry.getANode()
exists(string methodName, DataFlow::CallNode call |
methodMatchedByName(path, methodName) and
call.getMethodName() = methodName and
result.(API::MethodAccessNode).asCall() = call
)
}
@@ -112,20 +113,10 @@ API::Node getExtraNodeFromType(string type) {
constRef = getConstantFromConstPath(consts)
|
suffix = "!" and
(
result.(API::Node::Internal).asSourceInternal() = constRef
or
result.(API::Node::Internal).asSourceInternal() =
constRef.getADescendentModule().getAnOwnModuleSelf()
)
result = constRef.track()
or
suffix = "" and
(
result.(API::Node::Internal).asSourceInternal() = constRef.getAMethodCall("new")
or
result.(API::Node::Internal).asSourceInternal() =
constRef.getADescendentModule().getAnInstanceSelf()
)
result = constRef.track().getInstance()
)
or
type = "" and
@@ -145,21 +136,6 @@ private predicate methodMatchedByName(AccessPath path, string methodName) {
)
}
/**
* An API graph entry point corresponding to a method name such as `foo` in `;any;Method[foo]`.
*
* This ensures that the API graph rooted in that method call is materialized.
*/
private class EntryPointFromAnyType extends API::EntryPoint {
string name;
EntryPointFromAnyType() { this = "AnyMethod[" + name + "]" and methodMatchedByName(_, name) }
override DataFlow::CallNode getACall() { result.getMethodName() = name }
string getName() { result = name }
}
/**
* Gets a Ruby-specific API graph successor of `node` reachable by resolving `token`.
*/
@@ -175,9 +151,11 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) {
result = node.getInstance()
or
token.getName() = "Parameter" and
result =
node.getASuccessor(API::Label::getLabelFromParameterPosition(FlowSummaryImplSpecific::parseArgBody(token
.getAnArgument())))
exists(DataFlowDispatch::ArgumentPosition argPos, DataFlowDispatch::ParameterPosition paramPos |
argPos = FlowSummaryImplSpecific::parseParamBody(token.getAnArgument()) and
DataFlowDispatch::parameterMatch(paramPos, argPos) and
result = node.getParameterAtPosition(paramPos)
)
or
exists(DataFlow::ContentSet contents |
SummaryComponent::content(contents) = FlowSummaryImplSpecific::interpretComponentSpecific(token) and
@@ -191,9 +169,11 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) {
bindingset[token]
API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathToken token) {
token.getName() = "Argument" and
result =
node.getASuccessor(API::Label::getLabelFromArgumentPosition(FlowSummaryImplSpecific::parseParamBody(token
.getAnArgument())))
exists(DataFlowDispatch::ArgumentPosition argPos, DataFlowDispatch::ParameterPosition paramPos |
paramPos = FlowSummaryImplSpecific::parseArgBody(token.getAnArgument()) and
DataFlowDispatch::parameterMatch(paramPos, argPos) and
result = node.getArgumentAtPosition(argPos)
)
}
/**
@@ -211,7 +191,7 @@ predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathToke
/** An API graph node representing a method call. */
class InvokeNode extends API::MethodAccessNode {
/** Gets the number of arguments to the call. */
int getNumArgument() { result = this.getCallNode().getNumberOfArguments() }
int getNumArgument() { result = this.asCall().getNumberOfArguments() }
}
/** Gets the `InvokeNode` corresponding to a specific invocation of `node`. */

View File

@@ -0,0 +1,329 @@
/**
* Parts of API graphs that can be shared with other dynamic languages.
*
* Depends on TypeTrackerSpecific for the corresponding language.
*/
private import codeql.Locations
private import codeql.ruby.typetracking.TypeTracker
private import TypeTrackerSpecific
/**
* The signature to use when instantiating `ApiGraphShared`.
*
* The implementor should define a newtype with at least three branches as follows:
* ```ql
* newtype TApiNode =
* MkForwardNode(LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or
* MkBackwardNode(LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or
* MkSinkNode(Node node) { ... } or
* ...
* ```
*
* The three branches should be exposed through `getForwardNode`, `getBackwardNode`, and `getSinkNode`, respectively.
*/
signature module ApiGraphSharedSig {
/** A node in the API graph. */
class ApiNode {
/** Gets a string representation of this API node. */
string toString();
/** Gets the location associated with this API node, if any. */
Location getLocation();
}
/**
* Gets the forward node with the given type-tracking state.
*
* This node will have outgoing epsilon edges to its type-tracking successors.
*/
ApiNode getForwardNode(TypeTrackingNode node, TypeTracker t);
/**
* Gets the backward node with the given type-tracking state.
*
* This node will have outgoing epsilon edges to its type-tracking predecessors.
*/
ApiNode getBackwardNode(TypeTrackingNode node, TypeTracker t);
/**
* Gets the sink node corresponding to `node`.
*
* Since sinks are not generally `LocalSourceNode`s, such nodes are materialised separately in order for
* the API graph to include representatives for sinks. Note that there is no corresponding case for "source"
* nodes as these are represented as forward nodes with initial-state type-trackers.
*
* Sink nodes have outgoing epsilon edges to the backward nodes corresponding to their local sources.
*/
ApiNode getSinkNode(Node node);
/**
* Holds if a language-specific epsilon edge `pred -> succ` should be generated.
*/
predicate specificEpsilonEdge(ApiNode pred, ApiNode succ);
}
/**
* Parts of API graphs that can be shared between language implementations.
*/
module ApiGraphShared<ApiGraphSharedSig S> {
private import S
/** Gets a local source of `node`. */
bindingset[node]
pragma[inline_late]
TypeTrackingNode getALocalSourceStrict(Node node) { result = node.getALocalSource() }
cached
private module Cached {
/**
* Holds if there is an epsilon edge `pred -> succ`.
*
* That relation is reflexive, so `fastTC` produces the equivalent of a reflexive, transitive closure.
*/
pragma[noopt]
cached
predicate epsilonEdge(ApiNode pred, ApiNode succ) {
// forward
exists(
StepSummary summary, TypeTrackingNode predNode, TypeTracker predState,
TypeTrackingNode succNode, TypeTracker succState
|
StepSummary::stepCall(predNode, succNode, summary)
or
StepSummary::stepNoCall(predNode, succNode, summary)
|
pred = getForwardNode(predNode, predState) and
succState = StepSummary::append(predState, summary) and
succ = getForwardNode(succNode, succState)
or
succ = getBackwardNode(predNode, predState) and // swap order for backward flow
succState = StepSummary::append(predState, summary) and
pred = getBackwardNode(succNode, succState) // swap order for backward flow
)
or
exists(Node sink, TypeTrackingNode localSource |
pred = getSinkNode(sink) and
localSource = getALocalSourceStrict(sink) and
succ = getBackwardStartNode(localSource)
)
or
specificEpsilonEdge(pred, succ)
or
succ instanceof ApiNode and
succ = pred
}
/**
* Holds if `pred` can reach `succ` by zero or more epsilon edges.
*/
cached
predicate epsilonStar(ApiNode pred, ApiNode succ) = fastTC(epsilonEdge/2)(pred, succ)
/** Gets the API node to use when starting forward flow from `source` */
cached
ApiNode forwardStartNode(TypeTrackingNode source) {
result = getForwardNode(source, TypeTracker::end(false))
}
/** Gets the API node to use when starting backward flow from `sink` */
cached
ApiNode backwardStartNode(TypeTrackingNode sink) {
// There is backward flow A->B iff there is forward flow B->A.
// The starting point of backward flow corresponds to the end of a forward flow, and vice versa.
result = getBackwardNode(sink, TypeTracker::end(_))
}
/** Gets `node` as a data flow source. */
cached
TypeTrackingNode asSourceCached(ApiNode node) { node = forwardEndNode(result) }
/** Gets `node` as a data flow sink. */
cached
Node asSinkCached(ApiNode node) { node = getSinkNode(result) }
}
private import Cached
/** Gets an API node corresponding to the end of forward-tracking to `localSource`. */
pragma[nomagic]
private ApiNode forwardEndNode(TypeTrackingNode localSource) {
result = getForwardNode(localSource, TypeTracker::end(_))
}
/** Gets an API node corresponding to the end of backtracking to `localSource`. */
pragma[nomagic]
private ApiNode backwardEndNode(TypeTrackingNode localSource) {
result = getBackwardNode(localSource, TypeTracker::end(false))
}
/** Gets a node reachable from `node` by zero or more epsilon edges, including `node` itself. */
bindingset[node]
pragma[inline_late]
ApiNode getAnEpsilonSuccessorInline(ApiNode node) { epsilonStar(node, result) }
/** Gets `node` as a data flow sink. */
bindingset[node]
pragma[inline_late]
Node asSinkInline(ApiNode node) { result = asSinkCached(node) }
/** Gets `node` as a data flow source. */
bindingset[node]
pragma[inline_late]
TypeTrackingNode asSourceInline(ApiNode node) { result = asSourceCached(node) }
/** Gets a value reachable from `source`. */
bindingset[source]
pragma[inline_late]
Node getAValueReachableFromSourceInline(ApiNode source) {
exists(TypeTrackingNode src |
src = asSourceInline(getAnEpsilonSuccessorInline(source)) and
src.flowsTo(pragma[only_bind_into](result))
)
}
/** Gets a value that can reach `sink`. */
bindingset[sink]
pragma[inline_late]
Node getAValueReachingSinkInline(ApiNode sink) {
result = asSinkInline(getAnEpsilonSuccessorInline(sink))
}
/**
* Gets the starting point for forward-tracking at `node`.
*
* Should be used to obtain the successor of an edge when constructing labelled edges.
*/
bindingset[node]
pragma[inline_late]
ApiNode getForwardStartNode(Node node) { result = forwardStartNode(node) }
/**
* Gets the starting point of backtracking from `node`.
*
* Should be used to obtain the successor of an edge when constructing labelled edges.
*/
bindingset[node]
pragma[inline_late]
ApiNode getBackwardStartNode(Node node) { result = backwardStartNode(node) }
/**
* Gets a possible ending point of forward-tracking at `node`.
*
* Should be used to obtain the predecessor of an edge when constructing labelled edges.
*
* This is not backed by a `cached` predicate, and should only be used for materialising `cached`
* predicates in the API graph implementation - it should not be called in later stages.
*/
bindingset[node]
pragma[inline_late]
ApiNode getForwardEndNode(Node node) { result = forwardEndNode(node) }
/**
* Gets a possible ending point backtracking to `node`.
*
* Should be used to obtain the predecessor of an edge when constructing labelled edges.
*
* This is not backed by a `cached` predicate, and should only be used for materialising `cached`
* predicates in the API graph implementation - it should not be called in later stages.
*/
bindingset[node]
pragma[inline_late]
ApiNode getBackwardEndNode(Node node) { result = backwardEndNode(node) }
/**
* Gets a possible eding point of forward or backward tracking at `node`.
*
* Should be used to obtain the predecessor of an edge generated from store or load edges.
*/
bindingset[node]
pragma[inline_late]
ApiNode getForwardOrBackwardEndNode(Node node) {
result = getForwardEndNode(node) or result = getBackwardEndNode(node)
}
/** Gets an API node for tracking forward starting at `node`. This is the implementation of `DataFlow::LocalSourceNode.track()` */
bindingset[node]
pragma[inline_late]
ApiNode getNodeForForwardTracking(Node node) { result = forwardStartNode(node) }
/** Gets an API node for backtracking starting at `node`. The implementation of `DataFlow::Node.backtrack()`. */
bindingset[node]
pragma[inline_late]
ApiNode getNodeForBacktracking(Node node) {
result = getBackwardStartNode(getALocalSourceStrict(node))
}
/** Parts of the shared module to be re-exported by the user-facing `API` module. */
module Public {
/**
* The signature to use when instantiating the `ExplainFlow` module.
*/
signature module ExplainFlowSig {
/** Holds if `node` should be a source. */
predicate isSource(ApiNode node);
/** Holds if `node` should be a sink. */
default predicate isSink(ApiNode node) { any() }
/** Holds if `node` should be skipped in the generated paths. */
default predicate isHidden(ApiNode node) { none() }
}
/**
* Module to help debug and visualize the data flows underlying API graphs.
*
* This module exports the query predicates for a path-problem query, and should be imported
* into the top-level of such a query.
*
* The module argument should specify source and sink API nodes, and the resulting query
* will show paths of epsilon edges that go from a source to a sink. Only epsilon edges are visualized.
*
* To condense the output a bit, paths in which the source and sink are the same node are omitted.
*/
module ExplainFlow<ExplainFlowSig T> {
private import T
private ApiNode relevantNode() {
isSink(result) and
result = getAnEpsilonSuccessorInline(any(ApiNode node | isSource(node)))
or
epsilonEdge(result, relevantNode())
}
/** Holds if `node` is part of the graph to visualize. */
query predicate nodes(ApiNode node) { node = relevantNode() and not isHidden(node) }
private predicate edgeToHiddenNode(ApiNode pred, ApiNode succ) {
epsilonEdge(pred, succ) and
isHidden(succ) and
pred = relevantNode() and
succ = relevantNode()
}
/** Holds if `pred -> succ` is an edge in the graph to visualize. */
query predicate edges(ApiNode pred, ApiNode succ) {
nodes(pred) and
nodes(succ) and
exists(ApiNode mid |
edgeToHiddenNode*(pred, mid) and
epsilonEdge(mid, succ)
)
}
/** Holds for each source/sink pair to visualize in the graph. */
query predicate problems(
ApiNode location, ApiNode sourceNode, ApiNode sinkNode, string message
) {
nodes(sourceNode) and
nodes(sinkNode) and
isSource(sourceNode) and
isSink(sinkNode) and
sinkNode = getAnEpsilonSuccessorInline(sourceNode) and
sourceNode != sinkNode and
location = sinkNode and
message = "Node flows here"
}
}
}
}

View File

@@ -55,10 +55,9 @@ private module Cached {
)
}
pragma[nomagic]
private TypeTracker noContentTypeTracker(boolean hasCall) {
result = MkTypeTracker(hasCall, noContent())
}
/** Gets a type tracker with no content and the call bit set to the given value. */
cached
TypeTracker noContentTypeTracker(boolean hasCall) { result = MkTypeTracker(hasCall, noContent()) }
/** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */
cached
@@ -340,6 +339,8 @@ class StepSummary extends TStepSummary {
/** Provides predicates for updating step summaries (`StepSummary`s). */
module StepSummary {
predicate append = Cached::append/2;
/**
* Gets the summary that corresponds to having taken a forwards
* inter-procedural step from `nodeFrom` to `nodeTo`.
@@ -400,6 +401,35 @@ module StepSummary {
}
deprecated predicate localSourceStoreStep = flowsToStoreStep/3;
/** Gets the step summary for a level step. */
StepSummary levelStep() { result = LevelStep() }
/** Gets the step summary for a call step. */
StepSummary callStep() { result = CallStep() }
/** Gets the step summary for a return step. */
StepSummary returnStep() { result = ReturnStep() }
/** Gets the step summary for storing into `content`. */
StepSummary storeStep(TypeTrackerContent content) { result = StoreStep(content) }
/** Gets the step summary for loading from `content`. */
StepSummary loadStep(TypeTrackerContent content) { result = LoadStep(content) }
/** Gets the step summary for loading from `load` and then storing into `store`. */
StepSummary loadStoreStep(TypeTrackerContent load, TypeTrackerContent store) {
result = LoadStoreStep(load, store)
}
/** Gets the step summary for a step that only permits contents matched by `filter`. */
StepSummary withContent(ContentFilter filter) { result = WithContent(filter) }
/** Gets the step summary for a step that blocks contents matched by `filter`. */
StepSummary withoutContent(ContentFilter filter) { result = WithoutContent(filter) }
/** Gets the step summary for a jump step. */
StepSummary jumpStep() { result = JumpStep() }
}
/**
@@ -545,6 +575,13 @@ module TypeTracker {
* Gets a valid end point of type tracking.
*/
TypeTracker end() { result.end() }
/**
* INTERNAL USE ONLY.
*
* Gets a valid end point of type tracking with the call bit set to the given value.
*/
predicate end = Cached::noContentTypeTracker/1;
}
pragma[nomagic]

View File

@@ -1,8 +1,8 @@
classMethodCalls
| test1.rb:58:1:58:8 | Use getMember("M1").getMember("C1").getMethod("m").getReturn() |
| test1.rb:59:1:59:8 | Use getMember("M2").getMember("C3").getMethod("m").getReturn() |
| test1.rb:58:1:58:8 | ForwardNode(call to m) |
| test1.rb:59:1:59:8 | ForwardNode(call to m) |
instanceMethodCalls
| test1.rb:61:1:61:12 | Use getMember("M1").getMember("C1").getMethod("new").getReturn().getMethod("m").getReturn() |
| test1.rb:62:1:62:12 | Use getMember("M2").getMember("C3").getMethod("new").getReturn().getMethod("m").getReturn() |
| test1.rb:61:1:61:12 | ForwardNode(call to m) |
| test1.rb:62:1:62:12 | ForwardNode(call to m) |
flowThroughArray
| test1.rb:73:1:73:10 | call to m |

View File

@@ -0,0 +1,77 @@
import ruby
import codeql.ruby.ast.internal.TreeSitter
import codeql.ruby.dataflow.internal.AccessPathSyntax
import codeql.ruby.frameworks.data.internal.ApiGraphModels
import codeql.ruby.ApiGraphs
import TestUtilities.InlineExpectationsTest
class AccessPathFromExpectation extends AccessPath::Range {
AccessPathFromExpectation() { hasExpectationWithValue(_, this) }
}
API::Node evaluatePath(AccessPath path, int n) {
path instanceof AccessPathFromExpectation and
n = 1 and
exists(AccessPathToken token | token = path.getToken(0) |
token.getName() = "Member" and
result = API::getTopLevelMember(token.getAnArgument())
or
token.getName() = "Method" and
result = API::getTopLevelCall(token.getAnArgument())
or
token.getName() = "EntryPoint" and
result = token.getAnArgument().(API::EntryPoint).getANode()
)
or
result = getSuccessorFromNode(evaluatePath(path, n - 1), path.getToken(n - 1))
or
result = getSuccessorFromInvoke(evaluatePath(path, n - 1), path.getToken(n - 1))
or
// TODO this is a workaround, support parsing of Method['[]'] instead
path.getToken(n - 1).getName() = "MethodBracket" and
result = evaluatePath(path, n - 1).getMethod("[]")
}
API::Node evaluatePath(AccessPath path) { result = evaluatePath(path, path.getNumToken()) }
module ApiUseTest implements TestSig {
string getARelevantTag() { result = ["source", "sink", "call", "reachableFromSource"] }
predicate hasActualResult(Location location, string element, string tag, string value) {
// All results are considered optional
none()
}
predicate hasOptionalResult(Location location, string element, string tag, string value) {
exists(API::Node apiNode, DataFlow::Node dataflowNode |
apiNode = evaluatePath(value) and
(
tag = "source" and dataflowNode = apiNode.asSource()
or
tag = "reachableFromSource" and dataflowNode = apiNode.getAValueReachableFromSource()
or
tag = "sink" and dataflowNode = apiNode.asSink()
or
tag = "call" and dataflowNode = apiNode.asCall()
) and
location = dataflowNode.getLocation() and
element = dataflowNode.toString()
)
}
}
import MakeTest<ApiUseTest>
class CustomEntryPointCall extends API::EntryPoint {
CustomEntryPointCall() { this = "CustomEntryPointCall" }
override DataFlow::CallNode getACall() { result.getMethodName() = "customEntryPointCall" }
}
class CustomEntryPointUse extends API::EntryPoint {
CustomEntryPointUse() { this = "CustomEntryPointUse" }
override DataFlow::LocalSourceNode getASource() {
result.(DataFlow::CallNode).getMethodName() = "customEntryPointUse"
}
}

View File

@@ -1,39 +1,39 @@
Something.foo.withCallback do |a, b| #$ use=getMember("Something").getMethod("foo").getReturn()
a.something #$ use=getMember("Something").getMethod("foo").getReturn().getMethod("withCallback").getBlock().getParameter(0).getMethod("something").getReturn()
b.somethingElse #$ use=getMember("Something").getMethod("foo").getReturn().getMethod("withCallback").getBlock().getParameter(1).getMethod("somethingElse").getReturn()
end #$ use=getMember("Something").getMethod("foo").getReturn().getMethod("withCallback").getReturn()
Something.foo.withCallback do |a, b| #$ source=Member[Something].Method[foo].ReturnValue
a.something #$ source=Member[Something].Method[foo].ReturnValue.Method[withCallback].Argument[block].Argument[0].Method[something].ReturnValue
b.somethingElse #$ source=Member[Something].Method[foo].ReturnValue.Method[withCallback].Argument[block].Argument[1].Method[somethingElse].ReturnValue
end #$ source=Member[Something].Method[foo].ReturnValue.Method[withCallback].ReturnValue
Something.withNamedArg do |a:, b: nil| #$ use=getMember("Something")
a.something #$ use=getMember("Something").getMethod("withNamedArg").getBlock().getKeywordParameter("a").getMethod("something").getReturn()
b.somethingElse #$ use=getMember("Something").getMethod("withNamedArg").getBlock().getKeywordParameter("b").getMethod("somethingElse").getReturn()
end #$ use=getMember("Something").getMethod("withNamedArg").getReturn()
Something.withNamedArg do |a:, b: nil| #$ source=Member[Something]
a.something #$ source=Member[Something].Method[withNamedArg].Argument[block].Parameter[a:].Method[something].ReturnValue
b.somethingElse #$ source=Member[Something].Method[withNamedArg].Argument[block].Parameter[b:].Method[somethingElse].ReturnValue
end #$ source=Member[Something].Method[withNamedArg].ReturnValue
Something.withLambda ->(a, b) { #$ use=getMember("Something")
a.something #$ use=getMember("Something").getMethod("withLambda").getParameter(0).getParameter(0).getMethod("something").getReturn()
b.something #$ use=getMember("Something").getMethod("withLambda").getParameter(0).getParameter(1).getMethod("something").getReturn()
} #$ use=getMember("Something").getMethod("withLambda").getReturn()
Something.withLambda ->(a, b) { #$ source=Member[Something]
a.something #$ source=Member[Something].Method[withLambda].Argument[0].Parameter[0].Method[something].ReturnValue
b.something #$ source=Member[Something].Method[withLambda].Argument[0].Parameter[1].Method[something].ReturnValue
} #$ source=Member[Something].Method[withLambda].ReturnValue
Something.namedCallback( #$ use=getMember("Something")
Something.namedCallback( #$ source=Member[Something]
onEvent: ->(a, b) {
a.something #$ use=getMember("Something").getMethod("namedCallback").getKeywordParameter("onEvent").getParameter(0).getMethod("something").getReturn()
b.something #$ use=getMember("Something").getMethod("namedCallback").getKeywordParameter("onEvent").getParameter(1).getMethod("something").getReturn()
a.something #$ source=Member[Something].Method[namedCallback].Argument[onEvent:].Parameter[0].Method[something].ReturnValue
b.something #$ source=Member[Something].Method[namedCallback].Argument[onEvent:].Parameter[1].Method[something].ReturnValue
}
) #$ use=getMember("Something").getMethod("namedCallback").getReturn()
) #$ source=Member[Something].Method[namedCallback].ReturnValue
Something.nestedCall1 do |a| #$ use=getMember("Something")
a.nestedCall2 do |b:| #$ use=getMember("Something").getMethod("nestedCall1").getBlock().getParameter(0)
b.something #$ use=getMember("Something").getMethod("nestedCall1").getBlock().getParameter(0).getMethod("nestedCall2").getBlock().getKeywordParameter("b").getMethod("something").getReturn()
end #$ use=getMember("Something").getMethod("nestedCall1").getBlock().getParameter(0).getMethod("nestedCall2").getReturn()
end #$ use=getMember("Something").getMethod("nestedCall1").getReturn()
Something.nestedCall1 do |a| #$ source=Member[Something]
a.nestedCall2 do |b:| #$ reachableFromSource=Member[Something].Method[nestedCall1].Argument[block].Parameter[0]
b.something #$ source=Member[Something].Method[nestedCall1].Argument[block].Parameter[0].Method[nestedCall2].Argument[block].Parameter[b:].Method[something].ReturnValue
end #$ source=Member[Something].Method[nestedCall1].Argument[block].Parameter[0].Method[nestedCall2].ReturnValue
end #$ source=Member[Something].Method[nestedCall1].ReturnValue
def getCallback()
->(x) {
x.something #$ use=getMember("Something").getMethod("indirectCallback").getParameter(0).getParameter(0).getMethod("something").getReturn()
x.something #$ source=Member[Something].Method[indirectCallback].Argument[0].Parameter[0].Method[something].ReturnValue
}
end
Something.indirectCallback(getCallback()) #$ use=getMember("Something").getMethod("indirectCallback").getReturn()
Something.indirectCallback(getCallback()) #$ source=Member[Something].Method[indirectCallback].ReturnValue
Something.withMixed do |a, *args, b| #$ use=getMember("Something")
a.something #$ use=getMember("Something").getMethod("withMixed").getBlock().getParameter(0).getMethod("something").getReturn()
Something.withMixed do |a, *args, b| #$ source=Member[Something]
a.something #$ source=Member[Something].Method[withMixed].Argument[block].Parameter[0].Method[something].ReturnValue
# b.something # not currently handled correctly
end #$ use=getMember("Something").getMethod("withMixed").getReturn()
end #$ source=Member[Something].Method[withMixed].ReturnValue

View File

@@ -0,0 +1,31 @@
def chained_access1
Something.foo [[[
'sink' # $ sink=Member[Something].Method[foo].Argument[0].Element[0].Element[0].Element[0]
]]]
end
def chained_access2
array = []
array[0] = [[
'sink' # $ sink=Member[Something].Method[foo].Argument[0].Element[0].Element[0].Element[0]
]]
Something.foo array
end
def chained_access3
array = [[]]
array[0][0] = [
'sink' # $ sink=Member[Something].Method[foo].Argument[0].Element[0].Element[0].Element[0]
]
Something.foo array
end
def chained_access4
Something.foo {
:one => {
:two => {
:three => 'sink' # $ sink=Member[Something].Method[foo].Argument[0].Element[:one].Element[:two].Element[:three]
}
}
}
end

View File

@@ -0,0 +1,64 @@
class BaseClass
def inheritedInstanceMethod
yield "taint" # $ sink=Member[Something].Method[foo].Argument[block].ReturnValue.Method[inheritedInstanceMethod].Parameter[block].Argument[0]
end
def self.inheritedSingletonMethod
yield "taint" # $ sink=Member[Something].Method[bar].Argument[block].ReturnValue.Method[inheritedSingletonMethod].Parameter[block].Argument[0]
end
end
class ClassWithCallbacks < BaseClass
def instanceMethod
yield "taint" # $ sink=Member[Something].Method[foo].Argument[block].ReturnValue.Method[instanceMethod].Parameter[block].Argument[0]
end
def self.singletonMethod
yield "bar" # $ sink=Member[Something].Method[bar].Argument[block].ReturnValue.Method[singletonMethod].Parameter[block].Argument[0]
end
def escapeSelf
Something.baz { self }
end
def self.escapeSingletonSelf
Something.baz { self }
end
def self.foo x
x # $ reachableFromSource=Member[BaseClass].Method[foo].Parameter[0]
x # $ reachableFromSource=Member[ClassWithCallbacks].Method[foo].Parameter[0]
x # $ reachableFromSource=Member[Subclass].Method[foo].Parameter[0]
end
def bar x
x # $ reachableFromSource=Member[BaseClass].Instance.Method[bar].Parameter[0]
x # $ reachableFromSource=Member[ClassWithCallbacks].Instance.Method[bar].Parameter[0]
x # $ reachableFromSource=Member[Subclass].Instance.Method[bar].Parameter[0]
end
end
class Subclass < ClassWithCallbacks
def instanceMethodInSubclass
yield "bar" # $ sink=Member[Something].Method[baz].Argument[block].ReturnValue.Method[instanceMethodInSubclass].Parameter[block].Argument[0]
end
def self.singletonMethodInSubclass
yield "bar" # $ sink=Member[Something].Method[baz].Argument[block].ReturnValue.Method[singletonMethodInSubclass].Parameter[block].Argument[0]
end
end
Something.foo { ClassWithCallbacks.new }
Something.bar { ClassWithCallbacks }
class ClassWithCallMethod
def call x
x # $ reachableFromSource=Method[topLevelMethod].Argument[0].Parameter[0]
"bar" # $ sink=Method[topLevelMethod].Argument[0].ReturnValue
end
end
topLevelMethod ClassWithCallMethod.new
blah = topLevelMethod
blah # $ reachableFromSource=Method[topLevelMethod].ReturnValue

View File

@@ -1,34 +1,34 @@
MyModule #$ use=getMember("MyModule")
print MyModule.foo #$ use=getMember("MyModule").getMethod("foo").getReturn()
Kernel.print(e) #$ use=getMember("Kernel").getMethod("print").getReturn() def=getMember("Kernel").getMethod("print").getParameter(0)
Object::Kernel #$ use=getMember("Kernel")
Object::Kernel.print(e) #$ use=getMember("Kernel").getMethod("print").getReturn()
MyModule #$ source=Member[MyModule]
print MyModule.foo #$ source=Member[MyModule].Method[foo].ReturnValue
Kernel.print(e) #$ source=Member[Kernel].Method[print].ReturnValue sink=Member[Kernel].Method[print].Argument[0]
Object::Kernel #$ source=Member[Kernel]
Object::Kernel.print(e) #$ source=Member[Kernel].Method[print].ReturnValue
begin
print MyModule.bar #$ use=getMember("MyModule").getMethod("bar").getReturn()
raise AttributeError #$ use=getMember("AttributeError")
rescue AttributeError => e #$ use=getMember("AttributeError")
Kernel.print(e) #$ use=getMember("Kernel").getMethod("print").getReturn()
print MyModule.bar #$ source=Member[MyModule].Method[bar].ReturnValue
raise AttributeError #$ source=Member[AttributeError]
rescue AttributeError => e #$ source=Member[AttributeError]
Kernel.print(e) #$ source=Member[Kernel].Method[print].ReturnValue
end
Unknown.new.run #$ use=getMember("Unknown").getMethod("new").getReturn().getMethod("run").getReturn()
Foo::Bar::Baz #$ use=getMember("Foo").getMember("Bar").getMember("Baz")
Unknown.new.run #$ source=Member[Unknown].Method[new].ReturnValue.Method[run].ReturnValue
Foo::Bar::Baz #$ source=Member[Foo].Member[Bar].Member[Baz]
Const = [1, 2, 3] #$ use=getMember("Array").getMethod("[]").getReturn()
Const.each do |c| #$ use=getMember("Const")
puts c #$ use=getMember("Const").getMethod("each").getBlock().getParameter(0) use=getMember("Const").getContent(element)
end #$ use=getMember("Const").getMethod("each").getReturn() def=getMember("Const").getMethod("each").getBlock()
Const = [1, 2, 3] #$ source=Member[Array].MethodBracket.ReturnValue
Const.each do |c| #$ source=Member[Const]
puts c #$ reachableFromSource=Member[Const].Method[each].Argument[block].Parameter[0] reachableFromSource=Member[Const].Element[any]
end #$ source=Member[Const].Method[each].ReturnValue sink=Member[Const].Method[each].Argument[block]
foo = Foo #$ use=getMember("Foo")
foo::Bar::Baz #$ use=getMember("Foo").getMember("Bar").getMember("Baz")
foo = Foo #$ source=Member[Foo]
foo::Bar::Baz #$ source=Member[Foo].Member[Bar].Member[Baz]
FooAlias = Foo #$ use=getMember("Foo")
FooAlias::Bar::Baz #$ use=getMember("Foo").getMember("Bar").getMember("Baz")
FooAlias = Foo #$ source=Member[Foo]
FooAlias::Bar::Baz #$ source=Member[Foo].Member[Bar].Member[Baz] source=Member[FooAlias].Member[Bar].Member[Baz]
module Outer
module Inner
end
end
Outer::Inner.foo #$ use=getMember("Outer").getMember("Inner").getMethod("foo").getReturn()
Outer::Inner.foo #$ source=Member[Outer].Member[Inner].Method[foo].ReturnValue
module M1
class C1
@@ -40,36 +40,36 @@ module M1
end
end
class C2 < M1::C1 #$ use=getMember("M1").getMember("C1")
class C2 < M1::C1 #$ source=Member[M1].Member[C1]
end
module M2
class C3 < M1::C1 #$ use=getMember("M1").getMember("C1")
class C3 < M1::C1 #$ source=Member[M1].Member[C1]
end
class C4 < C2 #$ use=getMember("C2")
class C4 < C2 #$ source=Member[C2]
end
end
C2 #$ use=getMember("C2") use=getMember("M1").getMember("C1").getASubclass()
M2::C3 #$ use=getMember("M2").getMember("C3") use=getMember("M1").getMember("C1").getASubclass()
M2::C4 #$ use=getMember("M2").getMember("C4") use=getMember("C2").getASubclass() use=getMember("M1").getMember("C1").getASubclass().getASubclass()
C2 #$ source=Member[C2] reachableFromSource=Member[M1].Member[C1]
M2::C3 #$ source=Member[M2].Member[C3] reachableFromSource=Member[M1].Member[C1]
M2::C4 #$ source=Member[M2].Member[C4] reachableFromSource=Member[C2] reachableFromSource=Member[M1].Member[C1]
M1::C1.m #$ use=getMember("M1").getMember("C1").getMethod("m").getReturn()
M2::C3.m #$ use=getMember("M2").getMember("C3").getMethod("m").getReturn() use=getMember("M1").getMember("C1").getASubclass().getMethod("m").getReturn()
M1::C1.m #$ source=Member[M1].Member[C1].Method[m].ReturnValue
M2::C3.m #$ source=Member[M2].Member[C3].Method[m].ReturnValue source=Member[M1].Member[C1].Method[m].ReturnValue
M1::C1.new.m #$ use=getMember("M1").getMember("C1").getMethod("new").getReturn().getMethod("m").getReturn()
M2::C3.new.m #$ use=getMember("M2").getMember("C3").getMethod("new").getReturn().getMethod("m").getReturn()
M1::C1.new.m #$ source=Member[M1].Member[C1].Method[new].ReturnValue.Method[m].ReturnValue
M2::C3.new.m #$ source=Member[M2].Member[C3].Method[new].ReturnValue.Method[m].ReturnValue
Foo.foo(a,b:c) #$ use=getMember("Foo").getMethod("foo").getReturn() def=getMember("Foo").getMethod("foo").getParameter(0) def=getMember("Foo").getMethod("foo").getKeywordParameter("b")
Foo.foo(a,b:c) #$ source=Member[Foo].Method[foo].ReturnValue sink=Member[Foo].Method[foo].Argument[0] sink=Member[Foo].Method[foo].Argument[b:]
def userDefinedFunction(x, y)
x.noApiGraph(y)
x.customEntryPointCall(y) #$ call=entryPoint("CustomEntryPointCall") use=entryPoint("CustomEntryPointCall").getReturn() rhs=entryPoint("CustomEntryPointCall").getParameter(0)
x.customEntryPointUse(y) #$ use=entryPoint("CustomEntryPointUse")
x.customEntryPointCall(y) #$ call=EntryPoint[CustomEntryPointCall] source=EntryPoint[CustomEntryPointCall].ReturnValue sink=EntryPoint[CustomEntryPointCall].Parameter[0]
x.customEntryPointUse(y) #$ source=EntryPoint[CustomEntryPointUse]
end
array = [A::B::C] #$ use=getMember("Array").getMethod("[]").getReturn()
array[0].m #$ use=getMember("A").getMember("B").getMember("C").getMethod("m").getReturn()
array = [A::B::C] #$ source=Member[Array].MethodBracket.ReturnValue
array[0].m #$ source=Member[A].Member[B].Member[C].Method[m].ReturnValue source=Member[Array].MethodBracket.ReturnValue.Element[0].Method[m].ReturnValue
A::B::C[0] #$ use=getMember("A").getMember("B").getMember("C").getContent(element_0)
A::B::C[0] #$ source=Member[A].Member[B].Member[C].Element[0]

View File

@@ -1,88 +0,0 @@
import codeql.ruby.AST
import codeql.ruby.DataFlow
import TestUtilities.InlineExpectationsTest
import codeql.ruby.ApiGraphs
class CustomEntryPointCall extends API::EntryPoint {
CustomEntryPointCall() { this = "CustomEntryPointCall" }
override DataFlow::CallNode getACall() { result.getMethodName() = "customEntryPointCall" }
}
class CustomEntryPointUse extends API::EntryPoint {
CustomEntryPointUse() { this = "CustomEntryPointUse" }
override DataFlow::LocalSourceNode getASource() {
result.(DataFlow::CallNode).getMethodName() = "customEntryPointUse"
}
}
module ApiUseTest implements TestSig {
string getARelevantTag() { result = ["use", "def", "call"] }
private predicate relevantNode(API::Node a, DataFlow::Node n, Location l, string tag) {
l = n.getLocation() and
(
tag = "use" and
n = a.getAValueReachableFromSource()
or
tag = "def" and
n = a.asSink()
or
tag = "call" and
n = a.(API::MethodAccessNode).getCallNode()
)
}
predicate hasActualResult(Location location, string element, string tag, string value) {
tag = "use" and // def tags are always optional
exists(DataFlow::Node n | relevantNode(_, n, location, tag) |
// Only report the longest path on this line:
value =
max(API::Node a2, Location l2, DataFlow::Node n2 |
relevantNode(a2, n2, l2, tag) and
l2.getFile() = location.getFile() and
l2.getEndLine() = location.getEndLine()
|
a2.getPath()
order by
size(n2.asExpr().getExpr()), a2.getPath().length() desc, a2.getPath() desc
) and
element = n.toString()
)
}
// We also permit optional annotations for any other path on the line.
// This is used to test subclass paths, which typically have a shorter canonical path.
predicate hasOptionalResult(Location location, string element, string tag, string value) {
exists(API::Node a, DataFlow::Node n | relevantNode(a, n, location, tag) |
element = n.toString() and
value = getAPath(a, _)
)
}
}
import MakeTest<ApiUseTest>
private int size(AstNode n) { not n instanceof StmtSequence and result = count(n.getAChild*()) }
/**
* Gets a path of the given `length` from the root to the given node.
* This is a copy of `API::getAPath()` without the restriction on path length,
* which would otherwise rule out paths involving `getASubclass()`.
*/
string getAPath(API::Node node, int length) {
node instanceof API::Root and
length = 0 and
result = ""
or
exists(API::Node pred, API::Label::ApiLabel lbl, string predpath |
pred.getASuccessor(lbl) = node and
predpath = getAPath(pred, length - 1) and
exists(string dot | if length = 1 then dot = "" else dot = "." |
result = predpath + dot + lbl and
// avoid producing strings longer than 1MB
result.length() < 1000 * 1000
)
)
}