Merge pull request #166 from github/type_tracking

Minimal implementation of shared type-tracking library
This commit is contained in:
Arthur Baars
2021-05-06 10:59:45 +02:00
committed by GitHub
9 changed files with 641 additions and 40 deletions

2
codeql

Submodule codeql updated: a1ccbcdaf1...6693c5bdd0

View File

@@ -247,6 +247,11 @@ module ExprNodes {
override predicate relevantChild(Expr e) { e = this.getValue() or e = this.getBranch(_) }
}
/** A control-flow node that wraps a `MethodCall` AST expression. */
class MethodCallCfgNode extends CallCfgNode {
MethodCallCfgNode() { this.getExpr() instanceof MethodCall }
}
/** A control-flow node that wraps a `CaseExpr` AST expression. */
class CaseExprCfgNode extends ExprCfgNode {
override CaseExprChildMapping e;

View File

@@ -40,6 +40,12 @@ class DataFlowCallable = CfgScope;
class DataFlowCall extends CfgNodes::ExprNodes::CallCfgNode {
DataFlowCallable getEnclosingCallable() { result = this.getScope() }
DataFlowCallable getTarget() {
// TODO: this is a placeholder that finds a method with the same name, iff it's uniquely named.
result =
unique(DataFlowCallable c | c.(Method).getName() = this.getNode().(MethodCall).getMethodName())
}
}
/** Gets a viable run-time target for the call `call`. */

View File

@@ -2133,11 +2133,8 @@ private module Stage4 {
bindingset[node, cc, config]
private LocalCc getLocalCc(Node node, Cc cc, Configuration config) {
exists(Cc cc0 |
cc = pragma[only_bind_into](cc0) and
localFlowEntry(node, config) and
result = getLocalCallContext(cc0, getNodeEnclosingCallable(node))
)
localFlowEntry(node, config) and
result = getLocalCallContext(pragma[only_bind_out](cc), getNodeEnclosingCallable(node))
}
private predicate localStep(
@@ -3132,7 +3129,7 @@ private predicate pathStep(PathNodeMid mid, Node node, CallContext cc, SummaryCt
conf = mid.getConfiguration() and
cc = mid.getCallContext() and
sc = mid.getSummaryCtx() and
localCC = getLocalCallContext(cc, getNodeEnclosingCallable(midnode)) and
localCC = getLocalCallContext(pragma[only_bind_out](cc), getNodeEnclosingCallable(midnode)) and
ap0 = mid.getAp()
|
localFlowBigStep(midnode, node, true, _, conf, localCC) and

View File

@@ -2,6 +2,7 @@ private import ruby
private import DataFlowDispatch
private import DataFlowPrivate
private import codeql_ruby.CFG
private import codeql_ruby.typetracking.TypeTracker
/**
* An element, viewed as a node in a data flow graph. Either an expression
@@ -73,6 +74,37 @@ class ParameterNode extends Node, TParameterNode {
predicate isParameterOf(Callable c, int i) { p = c.getParameter(i) }
}
/**
* A data-flow node that is a source of local flow.
*/
class LocalSourceNode extends Node {
LocalSourceNode() { not simpleLocalFlowStep+(any(ExprNode n), this) }
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
pragma[inline]
predicate flowsTo(Node nodeTo) { hasLocalSource(nodeTo, this) }
/**
* Gets a node that this node may flow to using one heap and/or interprocedural step.
*
* See `TypeTracker` for more details about how to use this.
*/
pragma[inline]
LocalSourceNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
}
predicate hasLocalSource(Node sink, Node source) {
// Declaring `source` to be a `SourceNode` currently causes a redundant check in the
// recursive case, so instead we check it explicitly here.
source = sink and
source instanceof LocalSourceNode
or
exists(Node mid |
hasLocalSource(mid, source) and
simpleLocalFlowStep(mid, sink)
)
}
/** Gets a node corresponding to expression `e`. */
ExprNode exprNode(CfgNodes::ExprCfgNode e) { result.getExprNode() = e }

View File

@@ -1,5 +1,5 @@
/**
* Provides a language-independant implementation of static single assignment
* Provides a language-independent implementation of static single assignment
* (SSA) form.
*/
@@ -316,15 +316,23 @@ private module SsaDefReaches {
)
}
/**
* Holds if the reference to `def` at index `i` in basic block `bb` is the
* last reference to `v` inside `bb`.
*/
pragma[noinline]
predicate lastSsaRef(Definition def, SourceVariable v, BasicBlock bb, int i) {
ssaDefRank(def, v, bb, i, _) = maxSsaRefRank(bb, v)
}
predicate defOccursInBlock(Definition def, BasicBlock bb, SourceVariable v) {
exists(ssaDefRank(def, v, bb, _, _))
}
pragma[noinline]
private BasicBlock getAMaybeLiveSuccessor(Definition def, BasicBlock bb) {
result = getABasicBlockSuccessor(bb) and
not defOccursInBlock(_, bb, def.getSourceVariable()) and
ssaDefReachesEndOfBlock(bb, def, _)
private predicate ssaDefReachesThroughBlock(Definition def, BasicBlock bb) {
ssaDefReachesEndOfBlock(bb, def, _) and
not defOccursInBlock(_, bb, def.getSourceVariable())
}
/**
@@ -337,7 +345,11 @@ private module SsaDefReaches {
defOccursInBlock(def, bb1, _) and
bb2 = getABasicBlockSuccessor(bb1)
or
exists(BasicBlock mid | varBlockReaches(def, bb1, mid) | bb2 = getAMaybeLiveSuccessor(def, mid))
exists(BasicBlock mid |
varBlockReaches(def, bb1, mid) and
ssaDefReachesThroughBlock(def, mid) and
bb2 = getABasicBlockSuccessor(mid)
)
}
/**
@@ -348,24 +360,16 @@ private module SsaDefReaches {
*/
predicate defAdjacentRead(Definition def, BasicBlock bb1, BasicBlock bb2, int i2) {
varBlockReaches(def, bb1, bb2) and
ssaRefRank(bb2, i2, def.getSourceVariable(), SsaRead()) = 1 and
variableRead(bb2, i2, _, _)
ssaRefRank(bb2, i2, def.getSourceVariable(), SsaRead()) = 1
}
}
private import SsaDefReaches
pragma[noinline]
private predicate ssaDefReachesEndOfBlockRec(BasicBlock bb, Definition def, SourceVariable v) {
exists(BasicBlock idom | ssaDefReachesEndOfBlock(idom, def, v) |
// The construction of SSA form ensures that each read of a variable is
// dominated by its definition. An SSA definition therefore reaches a
// control flow node if it is the _closest_ SSA definition that dominates
// the node. If two definitions dominate a node then one must dominate the
// other, so therefore the definition of _closest_ is given by the dominator
// tree. Thus, reaching definitions can be calculated in terms of dominance.
idom = getImmediateBasicBlockDominator(bb)
)
pragma[nomagic]
predicate liveThrough(BasicBlock bb, SourceVariable v) {
liveAtExit(bb, v) and
not ssaRef(bb, _, v, SsaDef())
}
/**
@@ -382,9 +386,14 @@ predicate ssaDefReachesEndOfBlock(BasicBlock bb, Definition def, SourceVariable
liveAtExit(bb, v)
)
or
ssaDefReachesEndOfBlockRec(bb, def, v) and
liveAtExit(bb, v) and
not ssaRef(bb, _, v, SsaDef())
// The construction of SSA form ensures that each read of a variable is
// dominated by its definition. An SSA definition therefore reaches a
// control flow node if it is the _closest_ SSA definition that dominates
// the node. If two definitions dominate a node then one must dominate the
// other, so therefore the definition of _closest_ is given by the dominator
// tree. Thus, reaching definitions can be calculated in terms of dominance.
ssaDefReachesEndOfBlock(getImmediateBasicBlockDominator(bb), def, pragma[only_bind_into](v)) and
liveThrough(bb, pragma[only_bind_into](v))
}
/**
@@ -433,15 +442,22 @@ predicate adjacentDefRead(Definition def, BasicBlock bb1, int i1, BasicBlock bb2
bb2 = bb1
)
or
exists(SourceVariable v | ssaDefRank(def, v, bb1, i1, _) = maxSsaRefRank(bb1, v)) and
lastSsaRef(def, _, bb1, i1) and
defAdjacentRead(def, bb1, bb2, i2)
}
pragma[noinline]
private predicate adjacentDefRead(
Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2, SourceVariable v
) {
adjacentDefRead(def, bb1, i1, bb2, i2) and
v = def.getSourceVariable()
}
private predicate adjacentDefReachesRead(
Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2
) {
adjacentDefRead(def, bb1, i1, bb2, i2) and
exists(SourceVariable v | v = def.getSourceVariable() |
exists(SourceVariable v | adjacentDefRead(def, bb1, i1, bb2, i2, v) |
ssaRef(bb1, i1, v, SsaDef())
or
variableRead(bb1, i1, v, true)
@@ -474,17 +490,19 @@ predicate adjacentDefNoUncertainReads(Definition def, BasicBlock bb1, int i1, Ba
*/
pragma[nomagic]
predicate lastRefRedef(Definition def, BasicBlock bb, int i, Definition next) {
exists(int rnk, SourceVariable v, int j | rnk = ssaDefRank(def, v, bb, i, _) |
exists(SourceVariable v |
// Next reference to `v` inside `bb` is a write
next.definesAt(v, bb, j) and
rnk + 1 = ssaRefRank(bb, j, v, SsaDef())
exists(int rnk, int j |
rnk = ssaDefRank(def, v, bb, i, _) and
next.definesAt(v, bb, j) and
rnk + 1 = ssaRefRank(bb, j, v, SsaDef())
)
or
// Can reach a write using one or more steps
rnk = maxSsaRefRank(bb, v) and
lastSsaRef(def, v, bb, i) and
exists(BasicBlock bb2 |
varBlockReaches(def, bb, bb2) and
next.definesAt(v, bb2, j) and
1 = ssaRefRank(bb2, j, v, SsaDef())
1 = ssaDefRank(next, v, bb2, _, SsaDef())
)
)
}
@@ -538,7 +556,8 @@ pragma[nomagic]
predicate lastRef(Definition def, BasicBlock bb, int i) {
lastRefRedef(def, bb, i, _)
or
exists(SourceVariable v | ssaDefRank(def, v, bb, i, _) = maxSsaRefRank(bb, v) |
lastSsaRef(def, _, bb, i) and
(
// Can reach exit directly
bb instanceof ExitBasicBlock
or

View File

@@ -0,0 +1,420 @@
/** Step Summaries and Type Tracking */
private import TypeTrackerSpecific
/**
* Any string that may appear as the name of a piece of content. This will usually include things like:
* - Attribute names (in Python)
* - Property names (in JavaScript)
*
* In general, this can also be used to model things like stores to specific list indices. To ensure
* correctness, it is important that
*
* - different types of content do not have overlapping names, and
* - the empty string `""` is not a valid piece of content, as it is used to indicate the absence of
* content instead.
*/
class ContentName extends string {
ContentName() { this = getPossibleContentName() }
}
/** Either a content name, or the empty string (representing no content). */
class OptionalContentName extends string {
OptionalContentName() { this instanceof ContentName or this = "" }
}
/**
* A description of a step on an inter-procedural data flow path.
*/
private newtype TStepSummary =
LevelStep() or
CallStep() or
ReturnStep() or
StoreStep(ContentName content) or
LoadStep(ContentName content)
/**
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
*
* A description of a step on an inter-procedural data flow path.
*/
class StepSummary extends TStepSummary {
/** Gets a textual representation of this step summary. */
string toString() {
this instanceof LevelStep and result = "level"
or
this instanceof CallStep and result = "call"
or
this instanceof ReturnStep and result = "return"
or
exists(string content | this = StoreStep(content) | result = "store " + content)
or
exists(string content | this = LoadStep(content) | result = "load " + content)
}
}
/** Provides predicates for updating step summaries (`StepSummary`s). */
module StepSummary {
/**
* Gets the summary that corresponds to having taken a forwards
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*/
cached
predicate step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
exists(Node mid | nodeFrom.flowsTo(mid) and smallstep(mid, nodeTo, summary))
}
/**
* Gets the summary that corresponds to having taken a forwards
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*
* Unlike `StepSummary::step`, this predicate does not compress
* type-preserving steps.
*/
predicate smallstep(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
jumpStep(nodeFrom, nodeTo) and
summary = LevelStep()
or
callStep(nodeFrom, nodeTo) and summary = CallStep()
or
returnStep(nodeFrom, nodeTo) and
summary = ReturnStep()
or
exists(string content |
localSourceStoreStep(nodeFrom, nodeTo, content) and
summary = StoreStep(content)
or
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
)
}
/**
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
*
* Note that `nodeTo` will always be a local source node that flows to the place where the content
* is written in `basicStoreStep`. This may lead to the flow of information going "back in time"
* from the point of view of the execution of the program.
*
* For instance, if we interpret attribute writes in Python as writing to content with the same
* name as the attribute and consider the following snippet
*
* ```python
* def foo(y):
* x = Foo()
* bar(x)
* x.attr = y
* baz(x)
*
* def bar(x):
* z = x.attr
* ```
* for the attribute write `x.attr = y`, we will have `content` being the literal string `"attr"`,
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
*/
predicate localSourceStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string content) {
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
}
}
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentName content)
/**
* Summary of the steps needed to track a value to a given dataflow node.
*
* This can be used to track objects that implement a certain API in order to
* recognize calls to that API. Note that type-tracking does not by itself provide a
* source/sink relation, that is, it may determine that a node has a given type,
* but it won't determine where that type came from.
*
* It is recommended that all uses of this type are written in the following form,
* for tracking some type `myType`:
* ```ql
* DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
* t.start() and
* result = < source of myType >
* or
* exists (DataFlow::TypeTracker t2 |
* result = myType(t2).track(t2, t)
* )
* }
*
* DataFlow::Node myType() { myType(DataFlow::TypeTracker::end()).flowsTo(result) }
* ```
*
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
*/
class TypeTracker extends TTypeTracker {
Boolean hasCall;
OptionalContentName content;
TypeTracker() { this = MkTypeTracker(hasCall, content) }
/** Gets the summary resulting from appending `step` to this type-tracking summary. */
cached
TypeTracker append(StepSummary step) {
step = LevelStep() and result = this
or
step = CallStep() and result = MkTypeTracker(true, content)
or
step = ReturnStep() and hasCall = false and result = this
or
step = LoadStep(content) and result = MkTypeTracker(hasCall, "")
or
exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p))
}
/** Gets a textual representation of this summary. */
string toString() {
exists(string withCall, string withContent |
(if hasCall = true then withCall = "with" else withCall = "without") and
(if content != "" then withContent = " with content " + content else withContent = "") and
result = "type tracker " + withCall + " call steps" + withContent
)
}
/**
* Holds if this is the starting point of type tracking.
*/
predicate start() { hasCall = false and content = "" }
/**
* Holds if this is the starting point of type tracking, and the value starts in the content named `contentName`.
* The type tracking only ends after the content has been loaded.
*/
predicate startInContent(ContentName contentName) { hasCall = false and content = contentName }
/**
* Holds if this is the starting point of type tracking
* when tracking a parameter into a call, but not out of it.
*/
predicate call() { hasCall = true and content = "" }
/**
* Holds if this is the end point of type tracking.
*/
predicate end() { content = "" }
/**
* INTERNAL. DO NOT USE.
*
* Holds if this type has been tracked into a call.
*/
boolean hasCall() { result = hasCall }
/**
* INTERNAL. DO NOT USE.
*
* Gets the content associated with this type tracker.
*/
string getContent() { result = content }
/**
* Gets a type tracker that starts where this one has left off to allow continued
* tracking.
*
* This predicate is only defined if the type is not associated to a piece of content.
*/
TypeTracker continue() { content = "" and result = this }
/**
* Gets the summary that corresponds to having taken a forwards
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*/
pragma[inline]
TypeTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
exists(StepSummary summary |
StepSummary::step(nodeFrom, pragma[only_bind_out](nodeTo), pragma[only_bind_into](summary)) and
result = this.append(pragma[only_bind_into](summary))
)
}
/**
* Gets the summary that corresponds to having taken a forwards
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*
* Unlike `TypeTracker::step`, this predicate exposes all edges
* in the flow graph, and not just the edges between `Node`s.
* It may therefore be less performant.
*
* Type tracking predicates using small steps typically take the following form:
* ```ql
* DataFlow::Node myType(DataFlow::TypeTracker t) {
* t.start() and
* result = < source of myType >
* or
* exists (DataFlow::TypeTracker t2 |
* t = t2.smallstep(myType(t2), result)
* )
* }
*
* DataFlow::Node myType() {
* result = myType(DataFlow::TypeTracker::end())
* }
* ```
*/
pragma[inline]
TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
exists(StepSummary summary |
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
result = this.append(summary)
)
or
simpleLocalFlowStep(nodeFrom, nodeTo) and
result = this
}
}
/** Provides predicates for implementing custom `TypeTracker`s. */
module TypeTracker {
/**
* Gets a valid end point of type tracking.
*/
TypeTracker end() { result.end() }
}
private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalContentName content)
/**
* Summary of the steps needed to back-track a use of a value to a given dataflow node.
*
* This can for example be used to track callbacks that are passed to a certain API,
* so we can model specific parameters of that callback as having a certain type.
*
* Note that type back-tracking does not provide a source/sink relation, that is,
* it may determine that a node will be used in an API call somewhere, but it won't
* determine exactly where that use was, or the path that led to the use.
*
* It is recommended that all uses of this type are written in the following form,
* for back-tracking some callback type `myCallback`:
*
* ```ql
* DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
* t.start() and
* result = (< some API call >).getArgument(< n >).getALocalSource()
* or
* exists (DataFlow::TypeBackTracker t2 |
* result = myCallback(t2).backtrack(t2, t)
* )
* }
*
* DataFlow::LocalSourceNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
* ```
*
* Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent
* `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual
* intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`.
*/
class TypeBackTracker extends TTypeBackTracker {
Boolean hasReturn;
string content;
TypeBackTracker() { this = MkTypeBackTracker(hasReturn, content) }
/** Gets the summary resulting from prepending `step` to this type-tracking summary. */
TypeBackTracker prepend(StepSummary step) {
step = LevelStep() and result = this
or
step = CallStep() and hasReturn = false and result = this
or
step = ReturnStep() and result = MkTypeBackTracker(true, content)
or
exists(string p |
step = LoadStep(p) and content = "" and result = MkTypeBackTracker(hasReturn, p)
)
or
step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "")
}
/** Gets a textual representation of this summary. */
string toString() {
exists(string withReturn, string withContent |
(if hasReturn = true then withReturn = "with" else withReturn = "without") and
(if content != "" then withContent = " with content " + content else withContent = "") and
result = "type back-tracker " + withReturn + " return steps" + withContent
)
}
/**
* Holds if this is the starting point of type tracking.
*/
predicate start() { hasReturn = false and content = "" }
/**
* Holds if this is the end point of type tracking.
*/
predicate end() { content = "" }
/**
* INTERNAL. DO NOT USE.
*
* Holds if this type has been back-tracked into a call through return edge.
*/
boolean hasReturn() { result = hasReturn }
/**
* Gets a type tracker that starts where this one has left off to allow continued
* tracking.
*
* This predicate is only defined if the type has not been tracked into a piece of content.
*/
TypeBackTracker continue() { content = "" and result = this }
/**
* Gets the summary that corresponds to having taken a backwards
* heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
*/
pragma[inline]
TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
exists(StepSummary summary |
StepSummary::step(pragma[only_bind_out](nodeFrom), nodeTo, pragma[only_bind_into](summary)) and
this = result.prepend(pragma[only_bind_into](summary))
)
}
/**
* Gets the summary that corresponds to having taken a backwards
* local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
*
* Unlike `TypeBackTracker::step`, this predicate exposes all edges
* in the flowgraph, and not just the edges between
* `LocalSourceNode`s. It may therefore be less performant.
*
* Type tracking predicates using small steps typically take the following form:
* ```ql
* DataFlow::Node myType(DataFlow::TypeBackTracker t) {
* t.start() and
* result = < some API call >.getArgument(< n >)
* or
* exists (DataFlow::TypeBackTracker t2 |
* t = t2.smallstep(result, myType(t2))
* )
* }
*
* DataFlow::Node myType() {
* result = myType(DataFlow::TypeBackTracker::end())
* }
* ```
*/
pragma[inline]
TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) {
exists(StepSummary summary |
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
this = result.prepend(summary)
)
or
simpleLocalFlowStep(nodeFrom, nodeTo) and
this = result
}
}
/** Provides predicates for implementing custom `TypeBackTracker`s. */
module TypeBackTracker {
/**
* Gets a valid end point of type back-tracking.
*/
TypeBackTracker end() { result.end() }
}

View File

@@ -0,0 +1,118 @@
private import codeql_ruby.AST as AST
private import codeql_ruby.dataflow.internal.DataFlowPublic as DataFlowPublic
private import codeql_ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
private import codeql_ruby.dataflow.internal.DataFlowDispatch as DataFlowDispatch
private import codeql_ruby.controlflow.CfgNodes
class Node = DataFlowPublic::Node;
class LocalSourceNode = DataFlowPublic::LocalSourceNode;
predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStep/2;
predicate jumpStep = DataFlowPrivate::jumpStep/2;
/**
* Gets the name of a possible piece of content. This will usually include things like
*
* - Attribute names (in Python)
* - Property names (in JavaScript)
*/
string getPossibleContentName() { result = getSetterCallAttributeName(_) }
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
predicate callStep(
DataFlowPrivate::ArgumentNode nodeFrom, DataFlowPrivate::ExplicitParameterNode nodeTo
) {
exists(DataFlowDispatch::DataFlowCall call, DataFlowDispatch::DataFlowCallable callable, int i |
call.getTarget() = callable and
nodeFrom.argumentOf(call, i) and
nodeTo.isParameterOf(callable, i)
)
}
/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
predicate returnStep(DataFlowPrivate::ReturnNode nodeFrom, Node nodeTo) {
exists(DataFlowDispatch::DataFlowCall call |
nodeFrom.getEnclosingCallable() = call.getTarget() and
nodeTo.asExpr().getNode() = call.getNode()
)
}
/**
* Holds if `nodeFrom` is being written to the `content` content of the object
* in `nodeTo`.
*
* Note that the choice of `nodeTo` does not have to make sense
* "chronologically". All we care about is whether the `content` content of
* `nodeTo` can have a specific type, and the assumption is that if a specific
* type appears here, then any access of that particular content can yield
* something of that particular type.
*
* Thus, in an example such as
*
* ```rb
* def foo(y)
* x = Foo.new
* bar(x)
* x.content = y
* baz(x)
* end
*
* def bar(x)
* z = x.content
* end
* ```
* for the content write `x.content = y`, we will have `content` being the
* literal string `"content"`, `nodeFrom` will be `y`, and `nodeTo` will be the
* `Foo` object created on the first line of the function. This means we will
* track the fact that `x.content` can have the type of `y` into the assignment
* to `z` inside `bar`, even though this content write happens _after_ `bar` is
* called.
*/
predicate basicStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string content) {
// TODO: support SetterMethodCall inside TuplePattern
exists(ExprNodes::AssignmentCfgNode assignment, ExprNodes::MethodCallCfgNode call |
assignment.getLhs() = call and
content = getSetterCallAttributeName(call.getExpr()) and
nodeTo.(DataFlowPublic::ExprNode).getExprNode() = call.getReceiver() and
call.getExpr() instanceof AST::SetterMethodCall and
assignment.getRhs() = nodeFrom.(DataFlowPublic::ExprNode).getExprNode()
)
}
/**
* Returns the name of the attribute being set by the setter method call, i.e.
* the name of the setter method without the trailing `=`. In the following
* example, the result is `"bar"`.
*
* ```rb
* foo.bar = 1
* ```
*/
private string getSetterCallAttributeName(AST::SetterMethodCall call) {
// TODO: this should be exposed in `SetterMethodCall`
not call instanceof AST::ElementReference and
exists(string setterName |
setterName = call.getMethodName() and result = setterName.prefix(setterName.length() - 1)
)
}
/**
* Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`.
*/
predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) {
exists(ExprNodes::MethodCallCfgNode call |
call.getExpr().getNumberOfArguments() = 0 and
content = call.getExpr().(AST::MethodCall).getMethodName() and
nodeFrom.asExpr() = call.getReceiver() and
nodeTo.asExpr() = call
)
}
/**
* A utility class that is equivalent to `boolean` but does not require type joining.
*/
class Boolean extends boolean {
Boolean() { this = true or this = false }
}

View File

@@ -10,5 +10,9 @@
"DataFlow": [
"codeql/csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowImpl.qll",
"ql/src/codeql_ruby/dataflow/internal/DataFlowImpl.qll"
],
"TypeTracker": [
"codeql/python/ql/src/experimental/typetracking/TypeTracker.qll",
"ql/src/codeql_ruby/typetracking/TypeTracker.qll"
]
}
}