Ruby: Rework call graph implementation

This commit is contained in:
Tom Hvitved
2022-09-07 10:41:33 +02:00
parent 41c45c26bc
commit ac4d4ff613
11 changed files with 549 additions and 171 deletions

View File

@@ -1,3 +1,4 @@
import codeql.ruby.AST
import codeql.ruby.DataFlow::DataFlow
import codeql.ruby.dataflow.internal.DataFlowPrivate
import codeql.ruby.dataflow.internal.DataFlowImplConsistency::Consistency
@@ -11,5 +12,7 @@ private class MyConsistencyConfiguration extends ConsistencyConfiguration {
n instanceof SummaryNode
or
n instanceof SynthHashSplatArgumentNode
or
not isNonConstantExpr(n.asExpr())
}
}

View File

@@ -137,14 +137,7 @@ private module Cached {
}
cached
Method lookupMethod(Module m, string name) {
// The syntax_suggest library redefines Kernel.require/require_relative.
// Somehow this causes performance issues on ruby/ruby. As a workaround
// we exclude 'require' and 'require_relative'.
// TODO: find the actual cause of the slowdown and fix things properly.
not name = ["require", "require_relative"] and
TMethod(result) = lookupMethodOrConst(m, name)
}
Method lookupMethod(Module m, string name) { TMethod(result) = lookupMethodOrConst(m, name) }
cached
Expr lookupConst(Module m, string name) {

View File

@@ -311,13 +311,13 @@ private module Cached {
}
cached
predicate immediatelyControls(ConditionBlock cb, BasicBlock succ, BooleanSuccessor s) {
predicate immediatelyControls(ConditionBlock cb, BasicBlock succ, ConditionalSuccessor s) {
succ = cb.getASuccessor(s) and
forall(BasicBlock pred | pred = succ.getAPredecessor() and pred != cb | succ.dominates(pred))
}
cached
predicate controls(ConditionBlock cb, BasicBlock controlled, BooleanSuccessor s) {
predicate controls(ConditionBlock cb, BasicBlock controlled, ConditionalSuccessor s) {
exists(BasicBlock succ | cb.immediatelyControls(succ, s) | succ.dominates(controlled))
}
}
@@ -406,7 +406,7 @@ class ConditionBlock extends BasicBlock {
* successor of this block, and `succ` can only be reached from
* the callable entry point by going via the `s` edge out of this basic block.
*/
predicate immediatelyControls(BasicBlock succ, BooleanSuccessor s) {
predicate immediatelyControls(BasicBlock succ, ConditionalSuccessor s) {
immediatelyControls(this, succ, s)
}
@@ -415,5 +415,7 @@ class ConditionBlock extends BasicBlock {
* conditional value `s`. That is, `controlled` can only be reached from
* the callable entry point by going via the `s` edge out of this basic block.
*/
predicate controls(BasicBlock controlled, BooleanSuccessor s) { controls(this, controlled, s) }
predicate controls(BasicBlock controlled, ConditionalSuccessor s) {
controls(this, controlled, s)
}
}

View File

@@ -433,7 +433,7 @@ module ExprNodes {
}
private class WhenClauseChildMapping extends NonExprChildMapping, WhenClause {
override predicate relevantChild(AstNode e) { e = this.getBody() }
override predicate relevantChild(AstNode e) { e = [this.getBody(), this.getAPattern()] }
}
/** A control-flow node that wraps a `WhenClause` AST expression. */
@@ -444,6 +444,9 @@ module ExprNodes {
/** Gets the body of this `when`-clause. */
final ExprCfgNode getBody() { e.hasCfgChild(e.getBody(), this, result) }
/** Gets the `i`th pattern this `when`-clause. */
final ExprCfgNode getPattern(int i) { e.hasCfgChild(e.getPattern(i), this, result) }
}
/** A control-flow node that wraps a `CasePattern`. */

View File

@@ -44,7 +44,7 @@ class CfgNode extends TCfgNode {
final File getFile() { result = this.getLocation().getFile() }
/** Holds if this control flow node has conditional successors. */
final predicate isCondition() { exists(this.getASuccessor(any(BooleanSuccessor bs))) }
final predicate isCondition() { exists(this.getASuccessor(any(ConditionalSuccessor bs))) }
/** Gets the scope of this node. */
final CfgScope getScope() { result = getNodeCfgScope(this) }

View File

@@ -2,10 +2,12 @@ private import ruby
private import codeql.ruby.CFG
private import DataFlowPrivate
private import codeql.ruby.typetracking.TypeTracker
private import codeql.ruby.typetracking.TypeTrackerSpecific as TypeTrackerSpecific
private import codeql.ruby.ast.internal.Module
private import FlowSummaryImpl as FlowSummaryImpl
private import FlowSummaryImplSpecific as FlowSummaryImplSpecific
private import codeql.ruby.dataflow.FlowSummary
private import codeql.ruby.dataflow.SSA
newtype TReturnKind =
TNormalReturnKind() or
@@ -148,12 +150,19 @@ private class NormalCall extends DataFlowCall, TNormalCall {
pragma[nomagic]
private predicate methodCall(
CfgNodes::ExprNodes::CallCfgNode call, DataFlow::Node receiver, string method
) {
method = call.getExpr().(MethodCall).getMethodName() and
receiver.asExpr() = call.getReceiver()
}
pragma[nomagic]
private predicate flowsToMethodCall(
CfgNodes::ExprNodes::CallCfgNode call, DataFlow::LocalSourceNode sourceNode, string method
) {
exists(DataFlow::Node nodeTo |
method = call.getExpr().(MethodCall).getMethodName() and
nodeTo.asExpr() = call.getReceiver() and
sourceNode.flowsTo(nodeTo)
exists(DataFlow::Node receiver |
methodCall(call, receiver, method) and
sourceNode.flowsTo(receiver)
)
}
@@ -177,9 +186,94 @@ private predicate superCall(CfgNodes::ExprNodes::CallCfgNode call, Module superC
pragma[nomagic]
private predicate instanceMethodCall(CfgNodes::ExprNodes::CallCfgNode call, Module tp, string method) {
exists(DataFlow::LocalSourceNode sourceNode |
methodCall(call, sourceNode, method) and
sourceNode = trackInstance(tp)
exists(DataFlow::LocalSourceNode sourceNode, Module m, boolean exact |
flowsToMethodCall(call, sourceNode, method) and
sourceNode = trackInstance(m, exact)
|
tp = m
or
// When we don't know the exact type, it could be any sub class
exact = false and
tp.getSuperClass+() = m
)
}
/** Holds if `self` belongs to module `m`. */
pragma[nomagic]
private predicate selfInModule(SelfVariable self, Module m) {
exists(Scope scope |
scope = self.getDeclaringScope() and
m = scope.(ModuleBase).getModule() and
not scope instanceof Toplevel
)
}
/** Holds if `self` belongs to a method inside module `m`. */
pragma[nomagic]
private predicate selfInMethod(SelfVariable self, Module m) {
exists(Scope scope, ModuleBase encl |
scope = self.getDeclaringScope() and
encl = scope.(MethodBase).getEnclosingModule() and
if encl instanceof SingletonClass
then m = encl.getEnclosingModule().getModule()
else m = encl.getModule()
)
}
/** Holds if `self` belongs to the top-level. */
pragma[nomagic]
private predicate selfInToplevel(SelfVariable self, Module m) {
self.getDeclaringScope() instanceof Toplevel and
m = TResolved("Object")
}
/**
* Holds if SSA definition `def` belongs to a variable introduced via pattern
* matching on type `m`. For example, in
*
* ```rb
* case object
* in C => c then c.foo
* end
* ```
*
* the SSA definition for `c` is introduced by matching on `C`.
*/
predicate asModulePattern(SsaDefinitionNode def, Module m) {
exists(AsPattern ap |
m = resolveConstantReadAccess(ap.getPattern()) and
def.getDefinition().(Ssa::WriteDefinition).getWriteAccess() = ap.getVariableAccess()
)
}
/**
* Holds if `read1` and `read2` are adjacent reads of SSA definition `def`,
* and `read2` is checked to have type `m`. For example, in
*
* ```rb
* case object
* when C then object.foo
* end
* ```
*
* the two reads of `object` are adjacent, and the second is checked to have type `C`.
*/
predicate hasAdjacentTypeCheckedReads(
Ssa::Definition def, CfgNodes::ExprCfgNode read1, CfgNodes::ExprCfgNode read2, Module m
) {
exists(
CfgNodes::ExprCfgNode pattern, ConditionBlock cb, CfgNodes::ExprNodes::CaseExprCfgNode case
|
m = resolveConstantReadAccess(pattern.getExpr()) and
cb.getLastNode() = pattern and
cb.controls(read2.getBasicBlock(),
any(SuccessorTypes::MatchingSuccessor match | match.getValue() = true)) and
def.hasAdjacentReads(read1, read2) and
case.getValue() = read1
|
pattern = case.getBranch(_).(CfgNodes::ExprNodes::WhenClauseCfgNode).getPattern(_)
or
pattern = case.getBranch(_).(CfgNodes::ExprNodes::InClauseCfgNode).getPattern()
)
}
@@ -222,9 +316,47 @@ private module Cached {
else any()
)
or
exists(DataFlow::LocalSourceNode sourceNode |
methodCall(call, sourceNode, method) and
sourceNode = trackSingletonMethod(result, method)
// singleton method defined on an instance, e.g.
// ```rb
// c = C.new
// def c.singleton; end # <- result
// c.singleton # <- call
// ```
exists(DataFlow::Node sourceNode |
methodCall(call, sourceNode, method) or
flowsToMethodCall(call, sourceNode, method)
|
sourceNode = trackSingletonMethodOnInstance(result, method)
)
or
// singleton method defined on a module
exists(DataFlow::Node sourceNode, Module m |
flowsToMethodCall(call, sourceNode, method) and
singletonMethodOnModule(result, method, m)
|
// ```rb
// def C.singleton; end # <- result
// C.singleton # <- call
// ```
sourceNode = trackModuleAccess(m)
or
// ```rb
// class C
// def self.singleton; end # <- result
// self.singleton # <- call
// end
// ```
selfInModule(sourceNode.(SsaSelfDefinitionNode).getVariable(), m)
or
// ```rb
// class C
// def self.singleton; end # <- result
// def self.other
// self.singleton # <- call
// end
// end
// ```
selfInMethod(sourceNode.(SsaSelfDefinitionNode).getVariable(), m)
)
)
or
@@ -293,73 +425,156 @@ private module Cached {
import Cached
private DataFlow::LocalSourceNode trackInstance(Module tp, TypeTracker t) {
t.start() and
(
result.asExpr().getExpr() instanceof NilLiteral and tp = TResolved("NilClass")
or
result.asExpr().getExpr().(BooleanLiteral).isFalse() and tp = TResolved("FalseClass")
or
result.asExpr().getExpr().(BooleanLiteral).isTrue() and tp = TResolved("TrueClass")
or
result.asExpr().getExpr() instanceof IntegerLiteral and tp = TResolved("Integer")
or
result.asExpr().getExpr() instanceof FloatLiteral and tp = TResolved("Float")
or
result.asExpr().getExpr() instanceof RationalLiteral and tp = TResolved("Rational")
or
result.asExpr().getExpr() instanceof ComplexLiteral and tp = TResolved("Complex")
or
result.asExpr().getExpr() instanceof StringlikeLiteral and tp = TResolved("String")
or
result.asExpr() instanceof CfgNodes::ExprNodes::ArrayLiteralCfgNode and tp = TResolved("Array")
or
result.asExpr() instanceof CfgNodes::ExprNodes::HashLiteralCfgNode and tp = TResolved("Hash")
or
result.asExpr().getExpr() instanceof MethodBase and tp = TResolved("Symbol")
or
result.asParameter() instanceof BlockParameter and tp = TResolved("Proc")
or
result.asExpr().getExpr() instanceof Lambda and tp = TResolved("Proc")
or
exists(CfgNodes::ExprNodes::CallCfgNode call, DataFlow::Node nodeTo |
call.getExpr().(MethodCall).getMethodName() = "new" and
nodeTo.asExpr() = call.getReceiver() and
trackModule(tp).flowsTo(nodeTo) and
result.asExpr() = call
)
or
// `self` in method
tp = result.(SsaSelfDefinitionNode).getSelfScope().(Method).getEnclosingModule().getModule()
or
// `self` in singleton method
flowsToSingletonMethodObject(trackInstance(tp), result.(SsaSelfDefinitionNode).getSelfScope())
or
// `self` in top-level
result.(SsaSelfDefinitionNode).getSelfScope() instanceof Toplevel and
tp = TResolved("Object")
or
// a module or class
exists(Module m |
result = trackModule(m) and
if m.isClass() then tp = TResolved("Class") else tp = TResolved("Module")
)
)
pragma[nomagic]
private DataFlow::LocalSourceNode trackModuleAccess(Module m, TypeTracker t) {
t.start() and m = resolveConstantReadAccess(result.asExpr().getExpr())
or
exists(TypeTracker t2, StepSummary summary |
result = trackInstanceRec(tp, t2, summary) and t = t2.append(summary)
result = trackModuleAccessRec(m, t2, summary) and t = t2.append(summary)
)
}
pragma[nomagic]
private DataFlow::LocalSourceNode trackInstanceRec(Module tp, TypeTracker t, StepSummary summary) {
StepSummary::step(trackInstance(tp, t), result, summary)
private DataFlow::LocalSourceNode trackModuleAccessRec(Module m, TypeTracker t, StepSummary summary) {
StepSummary::step(trackModuleAccess(m, t), result, summary)
}
private DataFlow::LocalSourceNode trackInstance(Module tp) {
result = trackInstance(tp, TypeTracker::end())
pragma[nomagic]
private DataFlow::LocalSourceNode trackModuleAccess(Module m) {
result = trackModuleAccess(m, TypeTracker::end())
}
pragma[nomagic]
private DataFlow::LocalSourceNode trackInstance(Module tp, boolean exact, TypeTracker t) {
t.start() and
(
result.asExpr().getExpr() instanceof NilLiteral and
tp = TResolved("NilClass") and
exact = true
or
result.asExpr().getExpr().(BooleanLiteral).isFalse() and
tp = TResolved("FalseClass") and
exact = true
or
result.asExpr().getExpr().(BooleanLiteral).isTrue() and
tp = TResolved("TrueClass") and
exact = true
or
result.asExpr().getExpr() instanceof IntegerLiteral and
tp = TResolved("Integer") and
exact = true
or
result.asExpr().getExpr() instanceof FloatLiteral and
tp = TResolved("Float") and
exact = true
or
result.asExpr().getExpr() instanceof RationalLiteral and
tp = TResolved("Rational") and
exact = true
or
result.asExpr().getExpr() instanceof ComplexLiteral and
tp = TResolved("Complex") and
exact = true
or
result.asExpr().getExpr() instanceof StringlikeLiteral and
tp = TResolved("String") and
exact = true
or
result.asExpr() instanceof CfgNodes::ExprNodes::ArrayLiteralCfgNode and
tp = TResolved("Array") and
exact = true
or
result.asExpr() instanceof CfgNodes::ExprNodes::HashLiteralCfgNode and
tp = TResolved("Hash") and
exact = true
or
result.asExpr().getExpr() instanceof MethodBase and
tp = TResolved("Symbol") and
exact = true
or
result.asParameter() instanceof BlockParameter and
tp = TResolved("Proc") and
exact = true
or
result.asExpr().getExpr() instanceof Lambda and
tp = TResolved("Proc") and
exact = true
or
exists(CfgNodes::ExprNodes::CallCfgNode call, DataFlow::LocalSourceNode sourceNode |
flowsToMethodCall(call, sourceNode, "new") and
exact = true and
result.asExpr() = call
|
// `C.new`
sourceNode = trackModuleAccess(tp)
or
// `self.new` inside a module
selfInModule(sourceNode.(SsaSelfDefinitionNode).getVariable(), tp)
or
// `self.new` inside a (singleton) method
selfInMethod(sourceNode.(SsaSelfDefinitionNode).getVariable(), tp)
)
or
// `self` reference in method or top-level (but not in module, where instance
// methods cannot be called; only singleton methods)
result =
any(SsaSelfDefinitionNode self |
selfInMethod(self.getVariable(), tp) and
exact = false
or
selfInToplevel(self.getVariable(), tp) and
exact = true
)
or
exists(Module m |
(if m.isClass() then tp = TResolved("Class") else tp = TResolved("Module")) and
exact = true
|
// needed for e.g. `C.new`
m = resolveConstantReadAccess(result.asExpr().getExpr())
or
// needed for e.g. `self.include`
selfInModule(result.(SsaSelfDefinitionNode).getVariable(), m)
or
// needed for e.g. `self.puts`
selfInMethod(result.(SsaSelfDefinitionNode).getVariable(), m)
)
or
// `in C => c then c.foo`
asModulePattern(result, tp) and
exact = false
or
// `case object when C then object.foo`
hasAdjacentTypeCheckedReads(_, _, result.asExpr(), tp) and
exact = false
)
or
exists(TypeTracker t2, StepSummary summary |
result = trackInstanceRec(tp, t2, exact, summary) and t = t2.append(summary)
)
}
/**
* We exclude steps into `self` parameters and type checked variables. For those,
* we instead rely on the type of the enclosing module resp. the type being checked
* against, and apply an open-world assumption when determining possible dispatch
* targets.
*/
pragma[nomagic]
private DataFlow::LocalSourceNode trackInstanceRec(
Module tp, TypeTracker t, boolean exact, StepSummary summary
) {
StepSummary::step(trackInstance(tp, exact, t), result, summary) and
not result instanceof SelfParameterNode and
not hasAdjacentTypeCheckedReads(_, _, result.asExpr(), _)
}
pragma[nomagic]
private DataFlow::LocalSourceNode trackInstance(Module tp, boolean exact) {
result = trackInstance(tp, exact, TypeTracker::end())
}
pragma[nomagic]
private DataFlow::LocalSourceNode trackBlock(Block block, TypeTracker t) {
t.start() and result.asExpr().getExpr() = block
or
@@ -373,86 +588,202 @@ private DataFlow::LocalSourceNode trackBlockRec(Block block, TypeTracker t, Step
StepSummary::step(trackBlock(block, t), result, summary)
}
pragma[nomagic]
private DataFlow::LocalSourceNode trackBlock(Block block) {
result = trackBlock(block, TypeTracker::end())
}
private predicate singletonMethod(MethodBase method, Expr object) {
object = method.(SingletonMethod).getObject()
or
exists(SingletonClass cls |
object = cls.getValue() and method instanceof Method and method = cls.getAMethod()
/** Holds if `m` is a singleton method named `name`, defined on `object. */
private predicate singletonMethod(MethodBase m, string name, Expr object) {
name = m.getName() and
(
object = m.(SingletonMethod).getObject()
or
m = any(SingletonClass cls | object = cls.getValue()).getAMethod().(Method)
)
}
pragma[nomagic]
private predicate flowsToSingletonMethodObject(DataFlow::LocalSourceNode nodeFrom, MethodBase method) {
private predicate flowsToSingletonMethodObject(
DataFlow::LocalSourceNode nodeFrom, MethodBase m, string name
) {
exists(DataFlow::Node nodeTo |
nodeFrom.flowsTo(nodeTo) and
singletonMethod(method, nodeTo.asExpr().getExpr())
singletonMethod(m, name, nodeTo.asExpr().getExpr())
)
}
/**
* Holds if `method` is a singleton method named `name`, defined on module
* `m`:
*
* ```rb
* class C
* def self.m1; end # included
*
* class << self
* def m2; end # included
* end
* end
*
* def C.m3; end # included
*
* c_alias = C
* def c_alias.m4; end # included
*
* c = C.new
* def c.m5; end # not included
*
* class << c
* def m6; end # not included
* end
* ```
*/
pragma[nomagic]
private predicate moduleFlowsToSingletonMethodObject(Module m, MethodBase method) {
flowsToSingletonMethodObject(trackModule(m), method)
}
pragma[nomagic]
private DataFlow::LocalSourceNode trackSingletonMethod0(MethodBase method, TypeTracker t) {
t.start() and
(
flowsToSingletonMethodObject(result, method)
or
exists(Module m | result = trackModule(m) and moduleFlowsToSingletonMethodObject(m, method))
private predicate singletonMethodOnModule(MethodBase method, string name, Module m) {
exists(Expr object |
singletonMethod(method, name, object) and
selfInModule(object.(SelfVariableReadAccess).getVariable(), m)
)
or
exists(TypeTracker t2, StepSummary summary |
result = trackSingletonMethod0Rec(method, t2, summary) and t = t2.append(summary)
)
flowsToSingletonMethodObject(trackModuleAccess(m), method, name)
}
/**
* Holds if `method` is a singleton method named `name`, defined on expression
* `object`, where `object` is not likely to resolve to a module:
*
* ```rb
* class C
* def self.m1; end # not included
*
* class << self
* def m2; end # not included
* end
* end
*
* def C.m3; end # not included
*
* c_alias = C
* def c_alias.m4; end # included (due to negative recursion limitation)
*
* c = C.new
* def c.m5; end # included
*
* class << c
* def m6; end # included
* end
* ```
*/
pragma[nomagic]
private DataFlow::LocalSourceNode trackSingletonMethod0Rec(
MethodBase method, TypeTracker t, StepSummary summary
predicate singletonMethodOnInstance(MethodBase method, string name, Expr object) {
singletonMethod(method, name, object) and
not selfInModule(object.(SelfVariableReadAccess).getVariable(), _) and
// cannot use `trackModuleAccess` because of negative recursion
not exists(resolveConstantReadAccess(object))
}
/**
* Same as `singletonMethodOnInstance`, but where `n` is the post-update node
* of the object that is the target of the singleton method `method`.
*/
predicate singletonMethodOnInstancePostUpdate(
MethodBase method, string name, DataFlow::PostUpdateNode n
) {
StepSummary::step(trackSingletonMethod0(method, t), result, summary)
singletonMethodOnInstance(method, name, n.getPreUpdateNode().asExpr().getExpr())
}
/**
* Holds if there is reverse flow from `nodeFrom` to `nodeTo` via a parameter.
*
* This is only used for tracking singleton methods, where we want to be able
* to handle cases like
*
* ```rb
* def add_singleton x
* def x.foo; end
* end
*
* y = add_singleton C.new
* y.foo
* ```
*
* and
*
* ```rb
* class C
* def add_singleton_to_self
* def self.foo; end
* end
* end
*
* y = C.new
* y.add_singleton_to_self
* y.foo
* ```
*/
pragma[nomagic]
private predicate paramReturnFlow(DataFlow::PostUpdateNode nodeFrom, DataFlow::PostUpdateNode nodeTo) {
exists(
CfgNodes::ExprNodes::CallCfgNode call, DataFlow::Node arg, DataFlow::ParameterNode p,
Expr nodeFromPreExpr
|
TypeTrackerSpecific::callStep(call, arg, p) and
nodeTo.getPreUpdateNode() = arg and
nodeFromPreExpr = nodeFrom.getPreUpdateNode().asExpr().getExpr()
|
nodeFromPreExpr = p.getParameter().(NamedParameter).getVariable().getAnAccess()
or
nodeFromPreExpr = p.(SelfParameterNode).getSelfVariable().getAnAccess()
)
}
// Since post-update nodes are not (and should not be) `LocalSourceNode`s in general,
// we need to do local flow manually
pragma[nomagic]
private predicate argPostUpdateFlowsTo(DataFlow::PostUpdateNode arg, DataFlow::Node n) {
paramReturnFlow(_, arg) and
n = arg
or
exists(DataFlow::Node mid |
argPostUpdateFlowsTo(arg, mid) and
localFlowStepTypeTracker(mid, n)
)
}
pragma[nomagic]
private DataFlow::LocalSourceNode trackSingletonMethod(MethodBase m, string name) {
result = trackSingletonMethod0(m, TypeTracker::end()) and
name = m.getName()
}
private DataFlow::LocalSourceNode trackModule(Module tp, TypeTracker t) {
private DataFlow::Node trackSingletonMethodOnInstance(MethodBase method, string name, TypeTracker t) {
t.start() and
(
// ConstantReadAccess to Module
resolveConstantReadAccess(result.asExpr().getExpr()) = tp
or
// `self` reference to Module
exists(Scope scope | scope = result.(SsaSelfDefinitionNode).getSelfScope() |
tp = scope.(ModuleBase).getModule() and
not scope instanceof Toplevel // handled in `trackInstance`
or
scope = result.(SsaSelfDefinitionNode).getSelfScope() and
tp = scope.(SingletonMethod).getEnclosingModule().getModule()
)
)
singletonMethodOnInstancePostUpdate(method, name, result)
or
exists(TypeTracker t2, StepSummary summary |
result = trackModuleRec(tp, t2, summary) and t = t2.append(summary)
result = trackSingletonMethodOnInstanceRec(method, name, t2, summary) and
t = t2.append(summary) and
// do not step over redefinitions
not singletonMethodOnInstancePostUpdate(_, name, result)
)
}
pragma[nomagic]
private DataFlow::LocalSourceNode trackModuleRec(Module tp, TypeTracker t, StepSummary summary) {
StepSummary::step(trackModule(tp, t), result, summary)
private DataFlow::Node trackSingletonMethodOnInstanceRec(
MethodBase method, string name, TypeTracker t, StepSummary summary
) {
exists(DataFlow::Node mid | mid = trackSingletonMethodOnInstance(method, name, t) |
StepSummary::step(mid, result, summary)
or
// include flow out through parameters
paramReturnFlow(mid, result) and
summary.toString() = "return"
or
// include flow starting from an output argument
argPostUpdateFlowsTo(mid, result) and
summary.toString() = "level"
)
}
private DataFlow::LocalSourceNode trackModule(Module tp) {
result = trackModule(tp, TypeTracker::end())
pragma[nomagic]
private DataFlow::Node trackSingletonMethodOnInstance(MethodBase method, string name) {
result = trackSingletonMethodOnInstance(method, name, TypeTracker::end())
}
/**

View File

@@ -203,6 +203,12 @@ private class Argument extends CfgNodes::ExprCfgNode {
}
}
/** Holds if `n` is not a constant expression. */
predicate isNonConstantExpr(CfgNodes::ExprCfgNode n) {
not exists(n.getConstantValue()) and
not n.getExpr() instanceof ConstantAccess
}
/** A collection of cached types and predicates to be evaluated in the same stage. */
cached
private module Cached {
@@ -232,8 +238,13 @@ private module Cached {
isParameterNode(_, c, any(ParameterPosition p | p.isKeyword(_)))
} or
TExprPostUpdateNode(CfgNodes::ExprCfgNode n) {
n instanceof Argument or
n = any(CfgNodes::ExprNodes::InstanceVariableAccessCfgNode v).getReceiver()
// filter out nodes that clearly don't need post-update nodes
isNonConstantExpr(n) and
(
n instanceof Argument or
n = any(CfgNodes::ExprNodes::InstanceVariableAccessCfgNode v).getReceiver() or
singletonMethodOnInstance(_, _, n.getExpr())
)
} or
TSummaryNode(
FlowSummaryImpl::Public::SummarizedCallable c,
@@ -304,7 +315,40 @@ private module Cached {
nodeTo = LocalFlow::getParameterDefNode(p)
)
or
LocalFlow::localSsaFlowStepUseUse(_, nodeFrom, nodeTo)
exists(Ssa::Definition def |
LocalFlow::localSsaFlowStepUseUse(def, nodeFrom, nodeTo) and
// For nodes that are the target of a singleton method definition, such
// as `x` in `def x.foo; end`, we disallow use-use flow out of `x`, and
// instead add a type-tracker level-step from `x` to the post-update node
// of `x` (which does allow further use-use flow).
//
// This enables us to stregthen call resolution for singleton methods, since
// we can stop flow at redefinitions, which would otherwise not be possible,
// as type-tracking would step over such redefinitions.
//
// Example:
// ```rb
// def x.foo; end
// def x.foo; end
// x.foo # <- we want to resolve this call to the second definition only
// ```
not singletonMethodOnInstance(_, _, nodeFrom.asExpr().getExpr()) and
// We disallow adjacent use-use steps, where the target is type checked, and
// instead add a type-tracker level-step.
//
// This enables us to strengthen call resolution for instance methods, since
// we can use the extra type information on the target node.
//
// Example:
// ```rb
// case object
// when C then object.foo # <- we want to resolve this call as if it was a call inside `C`
// end
// ```
//
// The second access to `object` is known to have type `C` (or a sub-type thereof).
not hasAdjacentTypeCheckedReads(def, nodeFrom.asExpr(), nodeTo.asExpr(), _)
)
}
private predicate entrySsaDefinition(SsaDefinitionNode n) {
@@ -349,6 +393,15 @@ private module Cached {
or
// Needed for stores in type tracking
TypeTrackerSpecific::basicStoreStep(_, n, _)
or
// Needed to be able to track singleton methods defined on instances
singletonMethodOnInstancePostUpdate(_, _, n)
or
// Needed to be able to track instance methods on variables introduced via pattern matching
asModulePattern(n, _)
or
// Needed to be able to (better) track instance methods on variables that are type checked
hasAdjacentTypeCheckedReads(_, _, n.asExpr(), _)
}
cached
@@ -438,6 +491,9 @@ class SsaDefinitionNode extends NodeImpl, TSsaDefinitionNode {
/** Gets the underlying SSA definition. */
Ssa::Definition getDefinition() { result = def }
/** Gets the underlying variable. */
Variable getVariable() { result = def.getSourceVariable() }
override CfgScope getCfgScope() { result = def.getBasicBlock().getScope() }
override Location getLocationImpl() { result = def.getLocation() }
@@ -539,6 +595,9 @@ private module ParameterNodes {
final MethodBase getMethod() { result = method }
/** Gets the underlying `self` variable. */
final SelfVariable getSelfVariable() { result.getDeclaringScope() = method }
override Parameter getParameter() { none() }
override predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) {
@@ -1036,7 +1095,8 @@ predicate readStep(Node node1, ContentSet c, Node node2) {
// (instance variable assignment or setter method call).
node2.asExpr() =
any(CfgNodes::ExprNodes::MethodCallCfgNode call |
node1.asExpr() = call.getReceiver() and
node1.asExpr() =
any(CfgNodes::ExprCfgNode e | e = call.getReceiver() and isNonConstantExpr(e)) and
call.getNumberOfArguments() = 0 and
c.isSingleton(any(Content::FieldContent ct |
ct.getName() = "@" + call.getExpr().getMethodName()

View File

@@ -35,7 +35,16 @@ private predicate summarizedLocalStep(Node nodeFrom, Node nodeTo) {
}
/** Holds if there is a level step from `nodeFrom` to `nodeTo`. */
predicate levelStep(Node nodeFrom, Node nodeTo) { summarizedLocalStep(nodeFrom, nodeTo) }
predicate levelStep(Node nodeFrom, Node nodeTo) {
summarizedLocalStep(nodeFrom, nodeTo)
or
// See comment in `localFlowStepTypeTracker/2`
nodeTo.(DataFlowPublic::PostUpdateNode).getPreUpdateNode() = nodeFrom and
DataFlowDispatch::singletonMethodOnInstance(_, _, nodeFrom.asExpr().getExpr())
or
// See comment in `localFlowStepTypeTracker/2`
DataFlowDispatch::hasAdjacentTypeCheckedReads(_, nodeFrom.asExpr(), nodeTo.asExpr(), _)
}
/**
* Gets the name of a possible piece of content. This will usually include things like
@@ -67,10 +76,12 @@ private predicate viableParam(
)
}
private predicate callStep(ExprNodes::CallCfgNode call, Node nodeFrom, Node nodeTo) {
/** Holds if there is flow from `arg` to `p` via the call `call`. */
pragma[nomagic]
predicate callStep(ExprNodes::CallCfgNode call, Node arg, DataFlowPrivate::ParameterNodeImpl p) {
exists(DataFlowDispatch::ParameterPosition pos |
argumentPositionMatch(call, nodeFrom, pos) and
viableParam(call, nodeTo, pos)
argumentPositionMatch(call, arg, pos) and
viableParam(call, p, pos)
)
}

View File

@@ -55,12 +55,6 @@ edges
| instance_variables.rb:32:16:32:25 | call to source : | instance_variables.rb:32:1:32:4 | [post] foo3 [@field] : |
| instance_variables.rb:33:6:33:9 | foo3 [@field] : | instance_variables.rb:33:6:33:15 | call to field |
| instance_variables.rb:33:6:33:9 | foo3 [@field] : | instance_variables.rb:33:6:33:15 | call to field |
| instance_variables.rb:36:1:36:4 | [post] foo4 [@other] : | instance_variables.rb:37:6:37:9 | foo4 [@other] : |
| instance_variables.rb:36:1:36:4 | [post] foo4 [@other] : | instance_variables.rb:37:6:37:9 | foo4 [@other] : |
| instance_variables.rb:36:14:36:23 | call to source : | instance_variables.rb:36:1:36:4 | [post] foo4 [@other] : |
| instance_variables.rb:36:14:36:23 | call to source : | instance_variables.rb:36:1:36:4 | [post] foo4 [@other] : |
| instance_variables.rb:37:6:37:9 | foo4 [@other] : | instance_variables.rb:37:6:37:15 | call to other |
| instance_variables.rb:37:6:37:9 | foo4 [@other] : | instance_variables.rb:37:6:37:15 | call to other |
nodes
| instance_variables.rb:2:19:2:19 | x : | semmle.label | x : |
| instance_variables.rb:2:19:2:19 | x : | semmle.label | x : |
@@ -122,14 +116,6 @@ nodes
| instance_variables.rb:33:6:33:9 | foo3 [@field] : | semmle.label | foo3 [@field] : |
| instance_variables.rb:33:6:33:15 | call to field | semmle.label | call to field |
| instance_variables.rb:33:6:33:15 | call to field | semmle.label | call to field |
| instance_variables.rb:36:1:36:4 | [post] foo4 [@other] : | semmle.label | [post] foo4 [@other] : |
| instance_variables.rb:36:1:36:4 | [post] foo4 [@other] : | semmle.label | [post] foo4 [@other] : |
| instance_variables.rb:36:14:36:23 | call to source : | semmle.label | call to source : |
| instance_variables.rb:36:14:36:23 | call to source : | semmle.label | call to source : |
| instance_variables.rb:37:6:37:9 | foo4 [@other] : | semmle.label | foo4 [@other] : |
| instance_variables.rb:37:6:37:9 | foo4 [@other] : | semmle.label | foo4 [@other] : |
| instance_variables.rb:37:6:37:15 | call to other | semmle.label | call to other |
| instance_variables.rb:37:6:37:15 | call to other | semmle.label | call to other |
subpaths
| instance_variables.rb:16:15:16:24 | call to source : | instance_variables.rb:2:19:2:19 | x : | instance_variables.rb:3:9:3:14 | [post] self [@field] : | instance_variables.rb:16:1:16:3 | [post] foo [@field] : |
| instance_variables.rb:16:15:16:24 | call to source : | instance_variables.rb:2:19:2:19 | x : | instance_variables.rb:3:9:3:14 | [post] self [@field] : | instance_variables.rb:16:1:16:3 | [post] foo [@field] : |
@@ -149,4 +135,3 @@ subpaths
| instance_variables.rb:25:6:25:15 | call to field | instance_variables.rb:24:14:24:23 | call to source : | instance_variables.rb:25:6:25:15 | call to field | $@ | instance_variables.rb:24:14:24:23 | call to source : | call to source : |
| instance_variables.rb:29:6:29:19 | call to get_field | instance_variables.rb:28:14:28:23 | call to source : | instance_variables.rb:29:6:29:19 | call to get_field | $@ | instance_variables.rb:28:14:28:23 | call to source : | call to source : |
| instance_variables.rb:33:6:33:15 | call to field | instance_variables.rb:32:16:32:25 | call to source : | instance_variables.rb:33:6:33:15 | call to field | $@ | instance_variables.rb:32:16:32:25 | call to source : | call to source : |
| instance_variables.rb:37:6:37:15 | call to other | instance_variables.rb:36:14:36:23 | call to source : | instance_variables.rb:37:6:37:15 | call to other | $@ | instance_variables.rb:36:14:36:23 | call to source : | call to source : |

View File

@@ -34,4 +34,4 @@ sink(foo3.field) # $ hasValueFlow=22
foo4 = "hello"
foo4.other = source(23)
sink(foo4.other) # $ hasValueFlow=23
sink(foo4.other) # no field flow for constants

View File

@@ -4,9 +4,7 @@ getTarget
| calls.rb:5:1:5:3 | call to foo | calls.rb:77:1:81:3 | foo |
| calls.rb:8:5:8:15 | call to puts | calls.rb:94:5:94:30 | puts |
| calls.rb:11:1:11:8 | call to bar | calls.rb:7:1:9:3 | bar |
| calls.rb:11:1:11:8 | call to bar | calls.rb:13:1:15:3 | bar |
| calls.rb:14:5:14:15 | call to puts | calls.rb:94:5:94:30 | puts |
| calls.rb:17:1:17:8 | call to bar | calls.rb:7:1:9:3 | bar |
| calls.rb:17:1:17:8 | call to bar | calls.rb:13:1:15:3 | bar |
| calls.rb:19:1:19:8 | call to foo | calls.rb:1:1:3:3 | foo |
| calls.rb:19:1:19:8 | call to foo | calls.rb:77:1:81:3 | foo |
@@ -78,23 +76,19 @@ getTarget
| calls.rb:213:9:213:24 | call to singleton_g | calls.rb:225:1:227:3 | singleton_g |
| calls.rb:213:9:213:24 | call to singleton_g | calls.rb:232:1:234:3 | singleton_g |
| calls.rb:213:9:213:24 | call to singleton_g | calls.rb:240:5:242:7 | singleton_g |
| calls.rb:213:9:213:24 | call to singleton_g | calls.rb:256:1:258:3 | singleton_g |
| calls.rb:217:1:217:22 | call to singleton_a | calls.rb:180:5:183:7 | singleton_a |
| calls.rb:218:1:218:22 | call to singleton_f | calls.rb:207:9:209:11 | singleton_f |
| calls.rb:220:6:220:19 | call to new | calls.rb:106:5:106:16 | new |
| calls.rb:222:1:222:11 | call to instance | calls.rb:199:5:204:7 | instance |
| calls.rb:223:1:223:14 | call to singleton_e | calls.rb:200:9:202:11 | singleton_e |
| calls.rb:226:5:226:24 | call to puts | calls.rb:94:5:94:30 | puts |
| calls.rb:229:1:229:14 | call to singleton_g | calls.rb:225:1:227:3 | singleton_g |
| calls.rb:229:1:229:14 | call to singleton_g | calls.rb:232:1:234:3 | singleton_g |
| calls.rb:229:1:229:14 | call to singleton_g | calls.rb:240:5:242:7 | singleton_g |
| calls.rb:230:1:230:19 | call to call_singleton_g | calls.rb:212:5:214:7 | call_singleton_g |
| calls.rb:233:5:233:24 | call to puts | calls.rb:94:5:94:30 | puts |
| calls.rb:236:1:236:14 | call to singleton_g | calls.rb:225:1:227:3 | singleton_g |
| calls.rb:236:1:236:14 | call to singleton_g | calls.rb:232:1:234:3 | singleton_g |
| calls.rb:236:1:236:14 | call to singleton_g | calls.rb:240:5:242:7 | singleton_g |
| calls.rb:237:1:237:19 | call to call_singleton_g | calls.rb:212:5:214:7 | call_singleton_g |
| calls.rb:241:9:241:28 | call to puts | calls.rb:94:5:94:30 | puts |
| calls.rb:245:1:245:14 | call to singleton_g | calls.rb:225:1:227:3 | singleton_g |
| calls.rb:245:1:245:14 | call to singleton_g | calls.rb:232:1:234:3 | singleton_g |
| calls.rb:245:1:245:14 | call to singleton_g | calls.rb:240:5:242:7 | singleton_g |
| calls.rb:246:1:246:19 | call to call_singleton_g | calls.rb:212:5:214:7 | call_singleton_g |
| calls.rb:248:6:248:19 | call to new | calls.rb:106:5:106:16 | new |
@@ -102,8 +96,6 @@ getTarget
| calls.rb:254:1:254:16 | call to puts | calls.rb:94:5:94:30 | puts |
| calls.rb:257:5:257:22 | call to puts | calls.rb:94:5:94:30 | puts |
| calls.rb:260:1:260:22 | call to singleton_g | calls.rb:256:1:258:3 | singleton_g |
| calls.rb:261:1:261:14 | call to singleton_g | calls.rb:225:1:227:3 | singleton_g |
| calls.rb:261:1:261:14 | call to singleton_g | calls.rb:232:1:234:3 | singleton_g |
| calls.rb:261:1:261:14 | call to singleton_g | calls.rb:240:5:242:7 | singleton_g |
| calls.rb:262:1:262:19 | call to call_singleton_g | calls.rb:212:5:214:7 | call_singleton_g |
| calls.rb:264:6:264:19 | call to new | calls.rb:106:5:106:16 | new |
@@ -127,18 +119,18 @@ getTarget
| calls.rb:315:9:315:26 | call to puts | calls.rb:94:5:94:30 | puts |
| calls.rb:321:9:321:26 | call to puts | calls.rb:94:5:94:30 | puts |
| calls.rb:327:9:327:26 | call to puts | calls.rb:94:5:94:30 | puts |
| calls.rb:334:9:334:18 | call to instance | calls.rb:314:5:316:7 | instance |
| calls.rb:334:9:334:18 | call to instance | calls.rb:320:5:322:7 | instance |
| calls.rb:334:9:334:18 | call to instance | calls.rb:326:5:328:7 | instance |
| calls.rb:336:9:336:18 | call to instance | calls.rb:314:5:316:7 | instance |
| calls.rb:336:9:336:18 | call to instance | calls.rb:320:5:322:7 | instance |
| calls.rb:336:9:336:18 | call to instance | calls.rb:326:5:328:7 | instance |
| calls.rb:338:9:338:18 | call to instance | calls.rb:314:5:316:7 | instance |
| calls.rb:338:9:338:18 | call to instance | calls.rb:320:5:322:7 | instance |
| calls.rb:338:9:338:18 | call to instance | calls.rb:326:5:328:7 | instance |
| calls.rb:343:20:343:29 | call to instance | calls.rb:314:5:316:7 | instance |
| calls.rb:343:20:343:29 | call to instance | calls.rb:320:5:322:7 | instance |
| calls.rb:343:20:343:29 | call to instance | calls.rb:326:5:328:7 | instance |
| calls.rb:344:26:344:36 | call to instance | calls.rb:320:5:322:7 | instance |
| calls.rb:344:26:344:36 | call to instance | calls.rb:326:5:328:7 | instance |
| calls.rb:345:26:345:36 | call to instance | calls.rb:314:5:316:7 | instance |
| calls.rb:345:26:345:36 | call to instance | calls.rb:320:5:322:7 | instance |
| calls.rb:345:26:345:36 | call to instance | calls.rb:326:5:328:7 | instance |
| calls.rb:349:6:349:11 | call to new | calls.rb:106:5:106:16 | new |
| calls.rb:350:1:350:11 | call to instance | calls.rb:314:5:316:7 | instance |
| calls.rb:351:1:351:25 | call to pattern_dispatch | calls.rb:331:1:347:3 | pattern_dispatch |
@@ -151,6 +143,7 @@ getTarget
| calls.rb:361:6:361:11 | call to new | calls.rb:106:5:106:16 | new |
| calls.rb:362:1:362:16 | call to add_singleton | calls.rb:355:1:359:3 | add_singleton |
| calls.rb:363:1:363:11 | call to instance | calls.rb:314:5:316:7 | instance |
| calls.rb:363:1:363:11 | call to instance | calls.rb:356:5:358:7 | instance |
| hello.rb:12:5:12:24 | call to include | calls.rb:99:5:99:20 | include |
| hello.rb:14:16:14:20 | call to hello | hello.rb:2:5:4:7 | hello |
| hello.rb:20:16:20:20 | call to super | hello.rb:13:5:15:7 | message |
@@ -205,13 +198,10 @@ unresolvedCall
| calls.rb:145:1:145:8 | call to [] |
| calls.rb:145:4:145:5 | - ... |
| calls.rb:145:32:145:36 | call to abs |
| calls.rb:223:1:223:14 | call to singleton_e |
| calls.rb:249:1:249:14 | call to singleton_e |
| calls.rb:250:1:250:14 | call to singleton_g |
| calls.rb:263:1:263:14 | call to singleton_g |
| calls.rb:265:1:265:14 | call to singleton_g |
| calls.rb:344:26:344:36 | call to instance |
| calls.rb:345:26:345:36 | call to instance |
| hello.rb:20:16:20:26 | ... + ... |
| hello.rb:20:16:20:34 | ... + ... |
| hello.rb:20:16:20:40 | ... + ... |