diff --git a/ql/Cargo.lock b/ql/Cargo.lock index e071c5e71b4..3ba709a5ede 100644 Binary files a/ql/Cargo.lock and b/ql/Cargo.lock differ diff --git a/ql/buramu/Cargo.toml b/ql/buramu/Cargo.toml index 5b87a8a90be..ca832808727 100644 --- a/ql/buramu/Cargo.toml +++ b/ql/buramu/Cargo.toml @@ -9,4 +9,4 @@ edition = "2018" lazy_static = "1.4.0" chrono = "0.4.24" rayon = "1.7.0" -regex = "1.7.3" +regex = "1.8.0" diff --git a/ql/extractor/Cargo.toml b/ql/extractor/Cargo.toml index bc300138f96..9650466edfb 100644 --- a/ql/extractor/Cargo.toml +++ b/ql/extractor/Cargo.toml @@ -17,5 +17,5 @@ clap = { version = "4.2", features = ["derive"] } tracing = "0.1" tracing-subscriber = { version = "0.3.16", features = ["env-filter"] } rayon = "1.7.0" -regex = "1.7.3" +regex = "1.8.0" codeql-extractor = { path = "../../shared/tree-sitter-extractor" } diff --git a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll index d46859654c5..b1320d047cc 100644 --- a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll +++ b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll @@ -359,6 +359,11 @@ module API { Location getLocation() { result = this.getInducingNode().getLocation() or + exists(DataFlow::ModuleNode mod | + this = Impl::MkModuleObject(mod) and + result = mod.getLocation() + ) + or // For nodes that do not have a meaningful location, `path` is the empty string and all other // parameters are zero. not exists(this.getInducingNode()) and @@ -601,7 +606,9 @@ module API { /** A use of an API member at the node `nd`. */ MkUse(DataFlow::Node nd) { isUse(nd) } or /** A value that escapes into an external library at the node `nd` */ - MkDef(DataFlow::Node nd) { isDef(nd) } + MkDef(DataFlow::Node nd) { isDef(nd) } or + /** A module object seen as a use node. */ + MkModuleObject(DataFlow::ModuleNode mod) private string resolveTopLevel(ConstantReadAccess read) { result = read.getModule().getQualifiedName() and @@ -684,7 +691,14 @@ module API { * Holds if `ref` is a use of node `nd`. */ cached - predicate use(TApiNode nd, DataFlow::Node ref) { nd = MkUse(ref) } + predicate use(TApiNode nd, DataFlow::Node ref) { + nd = MkUse(ref) + or + exists(DataFlow::ModuleNode mod | + nd = MkModuleObject(mod) and + ref = mod.getAnImmediateReference() + ) + } /** * Holds if `rhs` is a RHS of node `nd`. @@ -802,6 +816,14 @@ module API { trackUseNode(use).flowsTo(call.getReceiver()) } + /** + * Holds if `superclass` is the superclass of `mod`. + */ + pragma[nomagic] + private predicate superclassNode(DataFlow::ModuleNode mod, DataFlow::Node superclass) { + superclass.asExpr().getExpr() = mod.getADeclaration().(ClassDeclaration).getSuperclassExpr() + } + /** * Holds if there is an edge from `pred` to `succ` in the API graph that is labeled with `lbl`. */ @@ -813,38 +835,35 @@ module API { useRoot(lbl, ref) or exists(DataFlow::Node node, DataFlow::Node src | - pred = MkUse(src) and + use(pred, src) and trackUseNode(src).flowsTo(node) and useStep(lbl, node, ref) ) or exists(DataFlow::Node callback | - pred = MkDef(callback) and + def(pred, callback) and parameterStep(lbl, trackDefNode(callback), ref) ) ) or exists(DataFlow::Node predNode, DataFlow::Node succNode | def(pred, predNode) and - def(succ, succNode) and + succ = MkDef(succNode) and defStep(lbl, trackDefNode(predNode), succNode) ) or - // `pred` is a use of class A - // `succ` is a use of class B - // there exists a class declaration B < A - exists(ClassDeclaration c, DataFlow::Node a, DataFlow::Node b | - use(pred, a) and - use(succ, b) and - b.asExpr().getExpr().(ConstantReadAccess).getAQualifiedName() = c.getAQualifiedName() and - pragma[only_bind_into](c).getSuperclassExpr() = a.asExpr().getExpr() and + exists(DataFlow::Node predNode, DataFlow::Node superclassNode, DataFlow::ModuleNode mod | + use(pred, predNode) and + trackUseNode(predNode).flowsTo(superclassNode) and + superclassNode(mod, superclassNode) and + succ = MkModuleObject(mod) and lbl = Label::subclass() ) or exists(DataFlow::CallNode call | // from receiver to method call node exists(DataFlow::Node receiver | - pred = MkUse(receiver) and + use(pred, receiver) and useNodeReachesReceiver(receiver, call) and lbl = Label::method(call.getMethodName()) and succ = MkMethodAccessNode(call) diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll index 418e3f621d2..9d668e0b300 100644 --- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll +++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll @@ -890,6 +890,9 @@ class ModuleNode instanceof Module { /** Gets a constant or `self` variable that refers to this module. */ LocalSourceNode getAnImmediateReference() { result.asExpr().getExpr() = super.getAnImmediateReference() + or + // Include 'self' parameters; these are not expressions and so not found by the case above + result = this.getAnOwnModuleSelf() } /** diff --git a/ruby/ql/test/library-tests/dataflow/helpers/dataflow.expected b/ruby/ql/test/library-tests/dataflow/helpers/dataflow.expected index 299a840e94c..fb2043d9759 100644 --- a/ruby/ql/test/library-tests/dataflow/helpers/dataflow.expected +++ b/ruby/ql/test/library-tests/dataflow/helpers/dataflow.expected @@ -127,11 +127,28 @@ getModuleLevelSelf getAnImmediateReference | file://:0:0:0:0 | Array | tst.rb:59:15:59:21 | Array | | file://:0:0:0:0 | Hash | tst.rb:60:14:60:45 | Hash | +| tst.rb:1:1:6:3 | C1 | tst.rb:1:1:6:3 | self (C1) | | tst.rb:1:1:6:3 | C1 | tst.rb:8:12:8:13 | C1 | +| tst.rb:8:1:11:3 | C2 | tst.rb:8:1:11:3 | self (C2) | | tst.rb:8:1:11:3 | C2 | tst.rb:27:12:27:13 | C2 | +| tst.rb:13:1:18:3 | Mixin | tst.rb:13:1:18:3 | self (Mixin) | +| tst.rb:13:1:18:3 | Mixin | tst.rb:16:5:17:7 | self in m1s | | tst.rb:13:1:18:3 | Mixin | tst.rb:28:13:28:17 | Mixin | +| tst.rb:20:1:25:3 | Mixin2 | tst.rb:20:1:25:3 | self (Mixin2) | +| tst.rb:20:1:25:3 | Mixin2 | tst.rb:23:5:24:7 | self in m2s | | tst.rb:20:1:25:3 | Mixin2 | tst.rb:29:13:29:18 | Mixin2 | +| tst.rb:27:1:35:3 | C3 | tst.rb:27:1:35:3 | self (C3) | +| tst.rb:27:1:35:3 | C3 | tst.rb:32:9:33:11 | self in c3_self1 | +| tst.rb:27:1:35:3 | C3 | tst.rb:37:1:38:3 | self in c3_self2 | | tst.rb:27:1:35:3 | C3 | tst.rb:37:5:37:6 | C3 | +| tst.rb:40:1:47:3 | N1 | tst.rb:40:1:47:3 | self (N1) | +| tst.rb:41:5:42:7 | N1::XY1 | tst.rb:41:5:42:7 | self (XY1) | +| tst.rb:43:5:46:7 | N1::N2 | tst.rb:43:5:46:7 | self (N2) | +| tst.rb:44:9:45:11 | N1::N2::XY2 | tst.rb:44:9:45:11 | self (XY2) | +| tst.rb:49:1:51:3 | N2 | tst.rb:49:1:51:3 | self (N2) | +| tst.rb:49:1:51:3 | N2 | tst.rb:52:1:55:3 | self (N2) | +| tst.rb:53:5:54:7 | N2::XY3 | tst.rb:53:5:54:7 | self (XY3) | +| tst.rb:57:1:62:3 | Nodes | tst.rb:57:1:62:3 | self (Nodes) | getOwnInstanceMethod | tst.rb:1:1:6:3 | C1 | c1 | tst.rb:2:5:5:7 | c1 | | tst.rb:8:1:11:3 | C2 | c2 | tst.rb:9:5:10:7 | c2 |