diff --git a/codeql b/codeql index a1ccbcdaf11..6693c5bdd0b 160000 --- a/codeql +++ b/codeql @@ -1 +1 @@ -Subproject commit a1ccbcdaf11c94fef1a99ca2dd3c23eca0acf36a +Subproject commit 6693c5bdd0b7452f9da5abda3aba9b0123b25c89 diff --git a/ql/src/codeql_ruby/controlflow/CfgNodes.qll b/ql/src/codeql_ruby/controlflow/CfgNodes.qll index 10449ed7c91..2c6f9f7a809 100644 --- a/ql/src/codeql_ruby/controlflow/CfgNodes.qll +++ b/ql/src/codeql_ruby/controlflow/CfgNodes.qll @@ -247,6 +247,11 @@ module ExprNodes { override predicate relevantChild(Expr e) { e = this.getValue() or e = this.getBranch(_) } } + /** A control-flow node that wraps a `MethodCall` AST expression. */ + class MethodCallCfgNode extends CallCfgNode { + MethodCallCfgNode() { this.getExpr() instanceof MethodCall } + } + /** A control-flow node that wraps a `CaseExpr` AST expression. */ class CaseExprCfgNode extends ExprCfgNode { override CaseExprChildMapping e; diff --git a/ql/src/codeql_ruby/dataflow/internal/DataFlowDispatch.qll b/ql/src/codeql_ruby/dataflow/internal/DataFlowDispatch.qll index b412793ffaa..916ea836e63 100644 --- a/ql/src/codeql_ruby/dataflow/internal/DataFlowDispatch.qll +++ b/ql/src/codeql_ruby/dataflow/internal/DataFlowDispatch.qll @@ -40,6 +40,12 @@ class DataFlowCallable = CfgScope; class DataFlowCall extends CfgNodes::ExprNodes::CallCfgNode { DataFlowCallable getEnclosingCallable() { result = this.getScope() } + + DataFlowCallable getTarget() { + // TODO: this is a placeholder that finds a method with the same name, iff it's uniquely named. + result = + unique(DataFlowCallable c | c.(Method).getName() = this.getNode().(MethodCall).getMethodName()) + } } /** Gets a viable run-time target for the call `call`. */ diff --git a/ql/src/codeql_ruby/dataflow/internal/DataFlowImpl.qll b/ql/src/codeql_ruby/dataflow/internal/DataFlowImpl.qll index 8b446d28b86..9498e51e7e6 100644 --- a/ql/src/codeql_ruby/dataflow/internal/DataFlowImpl.qll +++ b/ql/src/codeql_ruby/dataflow/internal/DataFlowImpl.qll @@ -2133,11 +2133,8 @@ private module Stage4 { bindingset[node, cc, config] private LocalCc getLocalCc(Node node, Cc cc, Configuration config) { - exists(Cc cc0 | - cc = pragma[only_bind_into](cc0) and - localFlowEntry(node, config) and - result = getLocalCallContext(cc0, getNodeEnclosingCallable(node)) - ) + localFlowEntry(node, config) and + result = getLocalCallContext(pragma[only_bind_out](cc), getNodeEnclosingCallable(node)) } private predicate localStep( @@ -3132,7 +3129,7 @@ private predicate pathStep(PathNodeMid mid, Node node, CallContext cc, SummaryCt conf = mid.getConfiguration() and cc = mid.getCallContext() and sc = mid.getSummaryCtx() and - localCC = getLocalCallContext(cc, getNodeEnclosingCallable(midnode)) and + localCC = getLocalCallContext(pragma[only_bind_out](cc), getNodeEnclosingCallable(midnode)) and ap0 = mid.getAp() | localFlowBigStep(midnode, node, true, _, conf, localCC) and diff --git a/ql/src/codeql_ruby/dataflow/internal/DataFlowPublic.qll b/ql/src/codeql_ruby/dataflow/internal/DataFlowPublic.qll index 7a8d40a5e1a..775d0bde444 100644 --- a/ql/src/codeql_ruby/dataflow/internal/DataFlowPublic.qll +++ b/ql/src/codeql_ruby/dataflow/internal/DataFlowPublic.qll @@ -2,6 +2,7 @@ private import ruby private import DataFlowDispatch private import DataFlowPrivate private import codeql_ruby.CFG +private import codeql_ruby.typetracking.TypeTracker /** * An element, viewed as a node in a data flow graph. Either an expression @@ -73,6 +74,37 @@ class ParameterNode extends Node, TParameterNode { predicate isParameterOf(Callable c, int i) { p = c.getParameter(i) } } +/** + * A data-flow node that is a source of local flow. + */ +class LocalSourceNode extends Node { + LocalSourceNode() { not simpleLocalFlowStep+(any(ExprNode n), this) } + + /** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */ + pragma[inline] + predicate flowsTo(Node nodeTo) { hasLocalSource(nodeTo, this) } + + /** + * Gets a node that this node may flow to using one heap and/or interprocedural step. + * + * See `TypeTracker` for more details about how to use this. + */ + pragma[inline] + LocalSourceNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) } +} + +predicate hasLocalSource(Node sink, Node source) { + // Declaring `source` to be a `SourceNode` currently causes a redundant check in the + // recursive case, so instead we check it explicitly here. + source = sink and + source instanceof LocalSourceNode + or + exists(Node mid | + hasLocalSource(mid, source) and + simpleLocalFlowStep(mid, sink) + ) +} + /** Gets a node corresponding to expression `e`. */ ExprNode exprNode(CfgNodes::ExprCfgNode e) { result.getExprNode() = e } diff --git a/ql/src/codeql_ruby/dataflow/internal/SsaImplCommon.qll b/ql/src/codeql_ruby/dataflow/internal/SsaImplCommon.qll index be01c05b8fa..f37a4f2d074 100644 --- a/ql/src/codeql_ruby/dataflow/internal/SsaImplCommon.qll +++ b/ql/src/codeql_ruby/dataflow/internal/SsaImplCommon.qll @@ -1,5 +1,5 @@ /** - * Provides a language-independant implementation of static single assignment + * Provides a language-independent implementation of static single assignment * (SSA) form. */ @@ -316,15 +316,23 @@ private module SsaDefReaches { ) } + /** + * Holds if the reference to `def` at index `i` in basic block `bb` is the + * last reference to `v` inside `bb`. + */ + pragma[noinline] + predicate lastSsaRef(Definition def, SourceVariable v, BasicBlock bb, int i) { + ssaDefRank(def, v, bb, i, _) = maxSsaRefRank(bb, v) + } + predicate defOccursInBlock(Definition def, BasicBlock bb, SourceVariable v) { exists(ssaDefRank(def, v, bb, _, _)) } pragma[noinline] - private BasicBlock getAMaybeLiveSuccessor(Definition def, BasicBlock bb) { - result = getABasicBlockSuccessor(bb) and - not defOccursInBlock(_, bb, def.getSourceVariable()) and - ssaDefReachesEndOfBlock(bb, def, _) + private predicate ssaDefReachesThroughBlock(Definition def, BasicBlock bb) { + ssaDefReachesEndOfBlock(bb, def, _) and + not defOccursInBlock(_, bb, def.getSourceVariable()) } /** @@ -337,7 +345,11 @@ private module SsaDefReaches { defOccursInBlock(def, bb1, _) and bb2 = getABasicBlockSuccessor(bb1) or - exists(BasicBlock mid | varBlockReaches(def, bb1, mid) | bb2 = getAMaybeLiveSuccessor(def, mid)) + exists(BasicBlock mid | + varBlockReaches(def, bb1, mid) and + ssaDefReachesThroughBlock(def, mid) and + bb2 = getABasicBlockSuccessor(mid) + ) } /** @@ -348,24 +360,16 @@ private module SsaDefReaches { */ predicate defAdjacentRead(Definition def, BasicBlock bb1, BasicBlock bb2, int i2) { varBlockReaches(def, bb1, bb2) and - ssaRefRank(bb2, i2, def.getSourceVariable(), SsaRead()) = 1 and - variableRead(bb2, i2, _, _) + ssaRefRank(bb2, i2, def.getSourceVariable(), SsaRead()) = 1 } } private import SsaDefReaches -pragma[noinline] -private predicate ssaDefReachesEndOfBlockRec(BasicBlock bb, Definition def, SourceVariable v) { - exists(BasicBlock idom | ssaDefReachesEndOfBlock(idom, def, v) | - // The construction of SSA form ensures that each read of a variable is - // dominated by its definition. An SSA definition therefore reaches a - // control flow node if it is the _closest_ SSA definition that dominates - // the node. If two definitions dominate a node then one must dominate the - // other, so therefore the definition of _closest_ is given by the dominator - // tree. Thus, reaching definitions can be calculated in terms of dominance. - idom = getImmediateBasicBlockDominator(bb) - ) +pragma[nomagic] +predicate liveThrough(BasicBlock bb, SourceVariable v) { + liveAtExit(bb, v) and + not ssaRef(bb, _, v, SsaDef()) } /** @@ -382,9 +386,14 @@ predicate ssaDefReachesEndOfBlock(BasicBlock bb, Definition def, SourceVariable liveAtExit(bb, v) ) or - ssaDefReachesEndOfBlockRec(bb, def, v) and - liveAtExit(bb, v) and - not ssaRef(bb, _, v, SsaDef()) + // The construction of SSA form ensures that each read of a variable is + // dominated by its definition. An SSA definition therefore reaches a + // control flow node if it is the _closest_ SSA definition that dominates + // the node. If two definitions dominate a node then one must dominate the + // other, so therefore the definition of _closest_ is given by the dominator + // tree. Thus, reaching definitions can be calculated in terms of dominance. + ssaDefReachesEndOfBlock(getImmediateBasicBlockDominator(bb), def, pragma[only_bind_into](v)) and + liveThrough(bb, pragma[only_bind_into](v)) } /** @@ -433,15 +442,22 @@ predicate adjacentDefRead(Definition def, BasicBlock bb1, int i1, BasicBlock bb2 bb2 = bb1 ) or - exists(SourceVariable v | ssaDefRank(def, v, bb1, i1, _) = maxSsaRefRank(bb1, v)) and + lastSsaRef(def, _, bb1, i1) and defAdjacentRead(def, bb1, bb2, i2) } +pragma[noinline] +private predicate adjacentDefRead( + Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2, SourceVariable v +) { + adjacentDefRead(def, bb1, i1, bb2, i2) and + v = def.getSourceVariable() +} + private predicate adjacentDefReachesRead( Definition def, BasicBlock bb1, int i1, BasicBlock bb2, int i2 ) { - adjacentDefRead(def, bb1, i1, bb2, i2) and - exists(SourceVariable v | v = def.getSourceVariable() | + exists(SourceVariable v | adjacentDefRead(def, bb1, i1, bb2, i2, v) | ssaRef(bb1, i1, v, SsaDef()) or variableRead(bb1, i1, v, true) @@ -474,17 +490,19 @@ predicate adjacentDefNoUncertainReads(Definition def, BasicBlock bb1, int i1, Ba */ pragma[nomagic] predicate lastRefRedef(Definition def, BasicBlock bb, int i, Definition next) { - exists(int rnk, SourceVariable v, int j | rnk = ssaDefRank(def, v, bb, i, _) | + exists(SourceVariable v | // Next reference to `v` inside `bb` is a write - next.definesAt(v, bb, j) and - rnk + 1 = ssaRefRank(bb, j, v, SsaDef()) + exists(int rnk, int j | + rnk = ssaDefRank(def, v, bb, i, _) and + next.definesAt(v, bb, j) and + rnk + 1 = ssaRefRank(bb, j, v, SsaDef()) + ) or // Can reach a write using one or more steps - rnk = maxSsaRefRank(bb, v) and + lastSsaRef(def, v, bb, i) and exists(BasicBlock bb2 | varBlockReaches(def, bb, bb2) and - next.definesAt(v, bb2, j) and - 1 = ssaRefRank(bb2, j, v, SsaDef()) + 1 = ssaDefRank(next, v, bb2, _, SsaDef()) ) ) } @@ -538,7 +556,8 @@ pragma[nomagic] predicate lastRef(Definition def, BasicBlock bb, int i) { lastRefRedef(def, bb, i, _) or - exists(SourceVariable v | ssaDefRank(def, v, bb, i, _) = maxSsaRefRank(bb, v) | + lastSsaRef(def, _, bb, i) and + ( // Can reach exit directly bb instanceof ExitBasicBlock or diff --git a/ql/src/codeql_ruby/typetracking/TypeTracker.qll b/ql/src/codeql_ruby/typetracking/TypeTracker.qll new file mode 100644 index 00000000000..3dba23fc0c1 --- /dev/null +++ b/ql/src/codeql_ruby/typetracking/TypeTracker.qll @@ -0,0 +1,420 @@ +/** Step Summaries and Type Tracking */ + +private import TypeTrackerSpecific + +/** + * Any string that may appear as the name of a piece of content. This will usually include things like: + * - Attribute names (in Python) + * - Property names (in JavaScript) + * + * In general, this can also be used to model things like stores to specific list indices. To ensure + * correctness, it is important that + * + * - different types of content do not have overlapping names, and + * - the empty string `""` is not a valid piece of content, as it is used to indicate the absence of + * content instead. + */ +class ContentName extends string { + ContentName() { this = getPossibleContentName() } +} + +/** Either a content name, or the empty string (representing no content). */ +class OptionalContentName extends string { + OptionalContentName() { this instanceof ContentName or this = "" } +} + +/** + * A description of a step on an inter-procedural data flow path. + */ +private newtype TStepSummary = + LevelStep() or + CallStep() or + ReturnStep() or + StoreStep(ContentName content) or + LoadStep(ContentName content) + +/** + * INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead. + * + * A description of a step on an inter-procedural data flow path. + */ +class StepSummary extends TStepSummary { + /** Gets a textual representation of this step summary. */ + string toString() { + this instanceof LevelStep and result = "level" + or + this instanceof CallStep and result = "call" + or + this instanceof ReturnStep and result = "return" + or + exists(string content | this = StoreStep(content) | result = "store " + content) + or + exists(string content | this = LoadStep(content) | result = "load " + content) + } +} + +/** Provides predicates for updating step summaries (`StepSummary`s). */ +module StepSummary { + /** + * Gets the summary that corresponds to having taken a forwards + * heap and/or inter-procedural step from `nodeFrom` to `nodeTo`. + */ + cached + predicate step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) { + exists(Node mid | nodeFrom.flowsTo(mid) and smallstep(mid, nodeTo, summary)) + } + + /** + * Gets the summary that corresponds to having taken a forwards + * local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`. + * + * Unlike `StepSummary::step`, this predicate does not compress + * type-preserving steps. + */ + predicate smallstep(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) { + jumpStep(nodeFrom, nodeTo) and + summary = LevelStep() + or + callStep(nodeFrom, nodeTo) and summary = CallStep() + or + returnStep(nodeFrom, nodeTo) and + summary = ReturnStep() + or + exists(string content | + localSourceStoreStep(nodeFrom, nodeTo, content) and + summary = StoreStep(content) + or + basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content) + ) + } + + /** + * Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`. + * + * Note that `nodeTo` will always be a local source node that flows to the place where the content + * is written in `basicStoreStep`. This may lead to the flow of information going "back in time" + * from the point of view of the execution of the program. + * + * For instance, if we interpret attribute writes in Python as writing to content with the same + * name as the attribute and consider the following snippet + * + * ```python + * def foo(y): + * x = Foo() + * bar(x) + * x.attr = y + * baz(x) + * + * def bar(x): + * z = x.attr + * ``` + * for the attribute write `x.attr = y`, we will have `content` being the literal string `"attr"`, + * `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the + * function. This means we will track the fact that `x.attr` can have the type of `y` into the + * assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called. + */ + predicate localSourceStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string content) { + exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content)) + } +} + +private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentName content) + +/** + * Summary of the steps needed to track a value to a given dataflow node. + * + * This can be used to track objects that implement a certain API in order to + * recognize calls to that API. Note that type-tracking does not by itself provide a + * source/sink relation, that is, it may determine that a node has a given type, + * but it won't determine where that type came from. + * + * It is recommended that all uses of this type are written in the following form, + * for tracking some type `myType`: + * ```ql + * DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) { + * t.start() and + * result = < source of myType > + * or + * exists (DataFlow::TypeTracker t2 | + * result = myType(t2).track(t2, t) + * ) + * } + * + * DataFlow::Node myType() { myType(DataFlow::TypeTracker::end()).flowsTo(result) } + * ``` + * + * Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent + * `t = t2.step(myType(t2), result)`. If you additionally want to track individual + * intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`. + */ +class TypeTracker extends TTypeTracker { + Boolean hasCall; + OptionalContentName content; + + TypeTracker() { this = MkTypeTracker(hasCall, content) } + + /** Gets the summary resulting from appending `step` to this type-tracking summary. */ + cached + TypeTracker append(StepSummary step) { + step = LevelStep() and result = this + or + step = CallStep() and result = MkTypeTracker(true, content) + or + step = ReturnStep() and hasCall = false and result = this + or + step = LoadStep(content) and result = MkTypeTracker(hasCall, "") + or + exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p)) + } + + /** Gets a textual representation of this summary. */ + string toString() { + exists(string withCall, string withContent | + (if hasCall = true then withCall = "with" else withCall = "without") and + (if content != "" then withContent = " with content " + content else withContent = "") and + result = "type tracker " + withCall + " call steps" + withContent + ) + } + + /** + * Holds if this is the starting point of type tracking. + */ + predicate start() { hasCall = false and content = "" } + + /** + * Holds if this is the starting point of type tracking, and the value starts in the content named `contentName`. + * The type tracking only ends after the content has been loaded. + */ + predicate startInContent(ContentName contentName) { hasCall = false and content = contentName } + + /** + * Holds if this is the starting point of type tracking + * when tracking a parameter into a call, but not out of it. + */ + predicate call() { hasCall = true and content = "" } + + /** + * Holds if this is the end point of type tracking. + */ + predicate end() { content = "" } + + /** + * INTERNAL. DO NOT USE. + * + * Holds if this type has been tracked into a call. + */ + boolean hasCall() { result = hasCall } + + /** + * INTERNAL. DO NOT USE. + * + * Gets the content associated with this type tracker. + */ + string getContent() { result = content } + + /** + * Gets a type tracker that starts where this one has left off to allow continued + * tracking. + * + * This predicate is only defined if the type is not associated to a piece of content. + */ + TypeTracker continue() { content = "" and result = this } + + /** + * Gets the summary that corresponds to having taken a forwards + * heap and/or inter-procedural step from `nodeFrom` to `nodeTo`. + */ + pragma[inline] + TypeTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) { + exists(StepSummary summary | + StepSummary::step(nodeFrom, pragma[only_bind_out](nodeTo), pragma[only_bind_into](summary)) and + result = this.append(pragma[only_bind_into](summary)) + ) + } + + /** + * Gets the summary that corresponds to having taken a forwards + * local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`. + * + * Unlike `TypeTracker::step`, this predicate exposes all edges + * in the flow graph, and not just the edges between `Node`s. + * It may therefore be less performant. + * + * Type tracking predicates using small steps typically take the following form: + * ```ql + * DataFlow::Node myType(DataFlow::TypeTracker t) { + * t.start() and + * result = < source of myType > + * or + * exists (DataFlow::TypeTracker t2 | + * t = t2.smallstep(myType(t2), result) + * ) + * } + * + * DataFlow::Node myType() { + * result = myType(DataFlow::TypeTracker::end()) + * } + * ``` + */ + pragma[inline] + TypeTracker smallstep(Node nodeFrom, Node nodeTo) { + exists(StepSummary summary | + StepSummary::smallstep(nodeFrom, nodeTo, summary) and + result = this.append(summary) + ) + or + simpleLocalFlowStep(nodeFrom, nodeTo) and + result = this + } +} + +/** Provides predicates for implementing custom `TypeTracker`s. */ +module TypeTracker { + /** + * Gets a valid end point of type tracking. + */ + TypeTracker end() { result.end() } +} + +private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalContentName content) + +/** + * Summary of the steps needed to back-track a use of a value to a given dataflow node. + * + * This can for example be used to track callbacks that are passed to a certain API, + * so we can model specific parameters of that callback as having a certain type. + * + * Note that type back-tracking does not provide a source/sink relation, that is, + * it may determine that a node will be used in an API call somewhere, but it won't + * determine exactly where that use was, or the path that led to the use. + * + * It is recommended that all uses of this type are written in the following form, + * for back-tracking some callback type `myCallback`: + * + * ```ql + * DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) { + * t.start() and + * result = (< some API call >).getArgument(< n >).getALocalSource() + * or + * exists (DataFlow::TypeBackTracker t2 | + * result = myCallback(t2).backtrack(t2, t) + * ) + * } + * + * DataFlow::LocalSourceNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) } + * ``` + * + * Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent + * `t2 = t.step(result, myCallback(t2))`. If you additionally want to track individual + * intra-procedural steps, use `t2 = t.smallstep(result, myCallback(t2))`. + */ +class TypeBackTracker extends TTypeBackTracker { + Boolean hasReturn; + string content; + + TypeBackTracker() { this = MkTypeBackTracker(hasReturn, content) } + + /** Gets the summary resulting from prepending `step` to this type-tracking summary. */ + TypeBackTracker prepend(StepSummary step) { + step = LevelStep() and result = this + or + step = CallStep() and hasReturn = false and result = this + or + step = ReturnStep() and result = MkTypeBackTracker(true, content) + or + exists(string p | + step = LoadStep(p) and content = "" and result = MkTypeBackTracker(hasReturn, p) + ) + or + step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "") + } + + /** Gets a textual representation of this summary. */ + string toString() { + exists(string withReturn, string withContent | + (if hasReturn = true then withReturn = "with" else withReturn = "without") and + (if content != "" then withContent = " with content " + content else withContent = "") and + result = "type back-tracker " + withReturn + " return steps" + withContent + ) + } + + /** + * Holds if this is the starting point of type tracking. + */ + predicate start() { hasReturn = false and content = "" } + + /** + * Holds if this is the end point of type tracking. + */ + predicate end() { content = "" } + + /** + * INTERNAL. DO NOT USE. + * + * Holds if this type has been back-tracked into a call through return edge. + */ + boolean hasReturn() { result = hasReturn } + + /** + * Gets a type tracker that starts where this one has left off to allow continued + * tracking. + * + * This predicate is only defined if the type has not been tracked into a piece of content. + */ + TypeBackTracker continue() { content = "" and result = this } + + /** + * Gets the summary that corresponds to having taken a backwards + * heap and/or inter-procedural step from `nodeTo` to `nodeFrom`. + */ + pragma[inline] + TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) { + exists(StepSummary summary | + StepSummary::step(pragma[only_bind_out](nodeFrom), nodeTo, pragma[only_bind_into](summary)) and + this = result.prepend(pragma[only_bind_into](summary)) + ) + } + + /** + * Gets the summary that corresponds to having taken a backwards + * local, heap and/or inter-procedural step from `nodeTo` to `nodeFrom`. + * + * Unlike `TypeBackTracker::step`, this predicate exposes all edges + * in the flowgraph, and not just the edges between + * `LocalSourceNode`s. It may therefore be less performant. + * + * Type tracking predicates using small steps typically take the following form: + * ```ql + * DataFlow::Node myType(DataFlow::TypeBackTracker t) { + * t.start() and + * result = < some API call >.getArgument(< n >) + * or + * exists (DataFlow::TypeBackTracker t2 | + * t = t2.smallstep(result, myType(t2)) + * ) + * } + * + * DataFlow::Node myType() { + * result = myType(DataFlow::TypeBackTracker::end()) + * } + * ``` + */ + pragma[inline] + TypeBackTracker smallstep(Node nodeFrom, Node nodeTo) { + exists(StepSummary summary | + StepSummary::smallstep(nodeFrom, nodeTo, summary) and + this = result.prepend(summary) + ) + or + simpleLocalFlowStep(nodeFrom, nodeTo) and + this = result + } +} + +/** Provides predicates for implementing custom `TypeBackTracker`s. */ +module TypeBackTracker { + /** + * Gets a valid end point of type back-tracking. + */ + TypeBackTracker end() { result.end() } +} diff --git a/ql/src/codeql_ruby/typetracking/TypeTrackerSpecific.qll b/ql/src/codeql_ruby/typetracking/TypeTrackerSpecific.qll new file mode 100644 index 00000000000..3f05a429b41 --- /dev/null +++ b/ql/src/codeql_ruby/typetracking/TypeTrackerSpecific.qll @@ -0,0 +1,118 @@ +private import codeql_ruby.AST as AST +private import codeql_ruby.dataflow.internal.DataFlowPublic as DataFlowPublic +private import codeql_ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate +private import codeql_ruby.dataflow.internal.DataFlowDispatch as DataFlowDispatch +private import codeql_ruby.controlflow.CfgNodes + +class Node = DataFlowPublic::Node; + +class LocalSourceNode = DataFlowPublic::LocalSourceNode; + +predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStep/2; + +predicate jumpStep = DataFlowPrivate::jumpStep/2; + +/** + * Gets the name of a possible piece of content. This will usually include things like + * + * - Attribute names (in Python) + * - Property names (in JavaScript) + */ +string getPossibleContentName() { result = getSetterCallAttributeName(_) } + +/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */ +predicate callStep( + DataFlowPrivate::ArgumentNode nodeFrom, DataFlowPrivate::ExplicitParameterNode nodeTo +) { + exists(DataFlowDispatch::DataFlowCall call, DataFlowDispatch::DataFlowCallable callable, int i | + call.getTarget() = callable and + nodeFrom.argumentOf(call, i) and + nodeTo.isParameterOf(callable, i) + ) +} + +/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */ +predicate returnStep(DataFlowPrivate::ReturnNode nodeFrom, Node nodeTo) { + exists(DataFlowDispatch::DataFlowCall call | + nodeFrom.getEnclosingCallable() = call.getTarget() and + nodeTo.asExpr().getNode() = call.getNode() + ) +} + +/** + * Holds if `nodeFrom` is being written to the `content` content of the object + * in `nodeTo`. + * + * Note that the choice of `nodeTo` does not have to make sense + * "chronologically". All we care about is whether the `content` content of + * `nodeTo` can have a specific type, and the assumption is that if a specific + * type appears here, then any access of that particular content can yield + * something of that particular type. + * + * Thus, in an example such as + * + * ```rb + * def foo(y) + * x = Foo.new + * bar(x) + * x.content = y + * baz(x) + * end + * + * def bar(x) + * z = x.content + * end + * ``` + * for the content write `x.content = y`, we will have `content` being the + * literal string `"content"`, `nodeFrom` will be `y`, and `nodeTo` will be the + * `Foo` object created on the first line of the function. This means we will + * track the fact that `x.content` can have the type of `y` into the assignment + * to `z` inside `bar`, even though this content write happens _after_ `bar` is + * called. + */ +predicate basicStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string content) { + // TODO: support SetterMethodCall inside TuplePattern + exists(ExprNodes::AssignmentCfgNode assignment, ExprNodes::MethodCallCfgNode call | + assignment.getLhs() = call and + content = getSetterCallAttributeName(call.getExpr()) and + nodeTo.(DataFlowPublic::ExprNode).getExprNode() = call.getReceiver() and + call.getExpr() instanceof AST::SetterMethodCall and + assignment.getRhs() = nodeFrom.(DataFlowPublic::ExprNode).getExprNode() + ) +} + +/** + * Returns the name of the attribute being set by the setter method call, i.e. + * the name of the setter method without the trailing `=`. In the following + * example, the result is `"bar"`. + * + * ```rb + * foo.bar = 1 + * ``` + */ +private string getSetterCallAttributeName(AST::SetterMethodCall call) { + // TODO: this should be exposed in `SetterMethodCall` + not call instanceof AST::ElementReference and + exists(string setterName | + setterName = call.getMethodName() and result = setterName.prefix(setterName.length() - 1) + ) +} + +/** + * Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`. + */ +predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) { + exists(ExprNodes::MethodCallCfgNode call | + call.getExpr().getNumberOfArguments() = 0 and + content = call.getExpr().(AST::MethodCall).getMethodName() and + nodeFrom.asExpr() = call.getReceiver() and + nodeTo.asExpr() = call + ) +} + +/** + * A utility class that is equivalent to `boolean` but does not require type joining. + */ +class Boolean extends boolean { + Boolean() { this = true or this = false } +} diff --git a/scripts/identical-files.json b/scripts/identical-files.json index 074df07cd9f..4b5fbb73347 100644 --- a/scripts/identical-files.json +++ b/scripts/identical-files.json @@ -10,5 +10,9 @@ "DataFlow": [ "codeql/csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowImpl.qll", "ql/src/codeql_ruby/dataflow/internal/DataFlowImpl.qll" + ], + "TypeTracker": [ + "codeql/python/ql/src/experimental/typetracking/TypeTracker.qll", + "ql/src/codeql_ruby/typetracking/TypeTracker.qll" ] -} \ No newline at end of file +}