From 5c3b231a01f07627ff8e5b49fcbe59ddc8c41aa8 Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Thu, 28 May 2026 10:49:04 +0200 Subject: [PATCH] Ruby: Adopt shared local name resolution library --- ruby/ql/lib/codeql/ruby/ast/Parameter.qll | 14 +- ruby/ql/lib/codeql/ruby/ast/internal/AST.qll | 4 +- .../codeql/ruby/ast/internal/Parameter.qll | 2 +- .../ql/lib/codeql/ruby/ast/internal/Scope.qll | 12 +- .../codeql/ruby/ast/internal/Synthesis.qll | 47 ++-- .../lib/codeql/ruby/ast/internal/Variable.qll | 266 +++++++++++------- ruby/ql/lib/qlpack.yml | 1 + 7 files changed, 209 insertions(+), 137 deletions(-) diff --git a/ruby/ql/lib/codeql/ruby/ast/Parameter.qll b/ruby/ql/lib/codeql/ruby/ast/Parameter.qll index 5b3994378c1..953d3f01d92 100644 --- a/ruby/ql/lib/codeql/ruby/ast/Parameter.qll +++ b/ruby/ql/lib/codeql/ruby/ast/Parameter.qll @@ -134,7 +134,7 @@ class BlockParameter extends NamedParameter, TBlockParameter { final override string getName() { result = g.getName().getValue() } final override LocalVariable getVariable() { - result = TLocalVariableReal(_, _, g.getName()) or + result.(LocalVariableReal).getDefiningNode() = g.getName() or result = TLocalVariableSynth(this, 0) } @@ -164,7 +164,7 @@ class HashSplatParameter extends NamedParameter, THashSplatParameter { final override string getAPrimaryQlClass() { result = "HashSplatParameter" } final override LocalVariable getVariable() { - result = TLocalVariableReal(_, _, g.getName()) or + result.(LocalVariableReal).getDefiningNode() = g.getName() or result = TLocalVariableSynth(this, 0) } @@ -212,7 +212,9 @@ class KeywordParameter extends NamedParameter, TKeywordParameter { final override string getAPrimaryQlClass() { result = "KeywordParameter" } - final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) } + final override LocalVariable getVariable() { + result.(LocalVariableReal).getDefiningNode() = g.getName() + } /** * Gets the default value, i.e. the value assigned to the parameter when one @@ -262,7 +264,9 @@ class OptionalParameter extends NamedParameter, TOptionalParameter { */ final Expr getDefaultValue() { toGenerated(result) = g.getValue() } - final override LocalVariable getVariable() { result = TLocalVariableReal(_, _, g.getName()) } + final override LocalVariable getVariable() { + result.(LocalVariableReal).getDefiningNode() = g.getName() + } final override string toString() { result = this.getName() } @@ -293,7 +297,7 @@ class SplatParameter extends NamedParameter, TSplatParameter { final override string getAPrimaryQlClass() { result = "SplatParameter" } final override LocalVariable getVariable() { - result = TLocalVariableReal(_, _, g.getName()) or + result.(LocalVariableReal).getDefiningNode() = g.getName() or result = TLocalVariableSynth(this, 0) } diff --git a/ruby/ql/lib/codeql/ruby/ast/internal/AST.qll b/ruby/ql/lib/codeql/ruby/ast/internal/AST.qll index bf187e22699..17d4a6bb8b6 100644 --- a/ruby/ql/lib/codeql/ruby/ast/internal/AST.qll +++ b/ruby/ql/lib/codeql/ruby/ast/internal/AST.qll @@ -207,9 +207,7 @@ private module Cached { TLambda(Ruby::Lambda g) or TLine(Ruby::Line g) or TLeftAssignmentList(Ruby::LeftAssignmentList g) or - TLocalVariableAccessReal(Ruby::Identifier g, TLocalVariableReal v) { - LocalVariableAccess::range(g, v) - } or + TLocalVariableAccessReal(Ruby::Identifier g, TLocalVariableReal v) { access(g, v) } or TLocalVariableAccessSynth(Ast::AstNode parent, int i, Ast::LocalVariable v) { mkSynthChild(LocalVariableAccessRealKind(v), parent, i) or diff --git a/ruby/ql/lib/codeql/ruby/ast/internal/Parameter.qll b/ruby/ql/lib/codeql/ruby/ast/internal/Parameter.qll index 8f07554fb0c..94d25aee032 100644 --- a/ruby/ql/lib/codeql/ruby/ast/internal/Parameter.qll +++ b/ruby/ql/lib/codeql/ruby/ast/internal/Parameter.qll @@ -33,7 +33,7 @@ class SimpleParameterRealImpl extends SimpleParameterImpl, TSimpleParameterReal SimpleParameterRealImpl() { this = TSimpleParameterReal(g) } - override LocalVariable getVariableImpl() { result = TLocalVariableReal(_, _, g) } + override LocalVariable getVariableImpl() { result.(LocalVariableReal).getDefiningNode() = g } override string getNameImpl() { result = g.getValue() } } diff --git a/ruby/ql/lib/codeql/ruby/ast/internal/Scope.qll b/ruby/ql/lib/codeql/ruby/ast/internal/Scope.qll index 03fe2ce4350..9b77a342d53 100644 --- a/ruby/ql/lib/codeql/ruby/ast/internal/Scope.qll +++ b/ruby/ql/lib/codeql/ruby/ast/internal/Scope.qll @@ -118,7 +118,7 @@ private Ruby::AstNode specialParentOf(Ruby::AstNode n) { ] } -private Ruby::AstNode parentOf(Ruby::AstNode n) { +Ruby::AstNode parentOf(Ruby::AstNode n) { n = getHereDocBody(result) or result = specialParentOf(n).getParent() @@ -172,13 +172,15 @@ private module Cached { } } -bindingset[n] -pragma[inline_late] -Scope::Range scopeOf(Ruby::AstNode n) { result = Cached::scopeOfImpl(n) } +import Cached bindingset[n] pragma[inline_late] -Scope scopeOfInclSynth(AstNode n) { result = Cached::scopeOfInclSynthImpl(n) } +Scope::Range scopeOf(Ruby::AstNode n) { result = scopeOfImpl(n) } + +bindingset[n] +pragma[inline_late] +Scope scopeOfInclSynth(AstNode n) { result = scopeOfInclSynthImpl(n) } abstract class ScopeImpl extends AstNode, TScopeType { final Scope getOuterScopeImpl() { result = scopeOfInclSynth(this) } diff --git a/ruby/ql/lib/codeql/ruby/ast/internal/Synthesis.qll b/ruby/ql/lib/codeql/ruby/ast/internal/Synthesis.qll index 9d2dd16ea63..ca40a4160d2 100644 --- a/ruby/ql/lib/codeql/ruby/ast/internal/Synthesis.qll +++ b/ruby/ql/lib/codeql/ruby/ast/internal/Synthesis.qll @@ -299,9 +299,12 @@ private predicate hasLocation(AstNode n, Location l) { private module ImplicitSelfSynthesis { pragma[nomagic] private predicate identifierMethodCallSelfSynthesis(AstNode mc, int i, Child child) { - child = SynthChild(SelfKind(TSelfVariable(scopeOf(toGenerated(mc)).getEnclosingSelfScope()))) and - mc = TIdentifierMethodCall(_) and - i = 0 + exists(SelfVariableImpl self | + self.getDeclaringScopeImpl() = scopeOf(toGenerated(mc)).getEnclosingSelfScope() and + child = SynthChild(SelfKind(self)) and + mc = TIdentifierMethodCall(_) and + i = 0 + ) } private class IdentifierMethodCallSelfSynthesis extends Synthesis { @@ -312,13 +315,14 @@ private module ImplicitSelfSynthesis { pragma[nomagic] private predicate regularMethodCallSelfSynthesis(TRegularMethodCall mc, int i, Child child) { - exists(Ruby::AstNode g | + exists(Ruby::AstNode g, SelfVariableImpl self | mc = TRegularMethodCall(g) and // If there's no explicit receiver, then the receiver is implicitly `self`. - not exists(g.(Ruby::Call).getReceiver()) - ) and - child = SynthChild(SelfKind(TSelfVariable(scopeOf(toGenerated(mc)).getEnclosingSelfScope()))) and - i = 0 + not exists(g.(Ruby::Call).getReceiver()) and + self.getDeclaringScopeImpl() = scopeOf(toGenerated(mc)).getEnclosingSelfScope() and + child = SynthChild(SelfKind(self)) and + i = 0 + ) } private class RegularMethodCallSelfSynthesis extends Synthesis { @@ -341,9 +345,10 @@ private module ImplicitSelfSynthesis { */ pragma[nomagic] private SelfKind getSelfKind(InstanceVariableAccess var) { - exists(Ruby::AstNode owner | + exists(Ruby::AstNode owner, SelfVariableImpl self | + self.getDeclaringScopeImpl() = scopeOf(owner).getEnclosingSelfScope() and owner = toGenerated(instanceVarAccessSynthParentStar(var)) and - result = SelfKind(TSelfVariable(scopeOf(owner).getEnclosingSelfScope())) + result = SelfKind(self) ) } @@ -1566,20 +1571,20 @@ private module ForLoopDesugar { * { a: a } * ``` */ -private module ImplicitHashValueSynthesis { - private Ruby::AstNode keyWithoutValue(AstNode parent, int i) { +module ImplicitHashValueSynthesis { + Ruby::AstNode keyWithoutValue(Ruby::AstNode parent, int i) { exists(Ruby::KeywordPattern pair | result = pair.getKey() and - result = toGenerated(parent.(HashPattern).getKey(i)) and + result = parent.(Ruby::HashPattern).getChild(i).(Ruby::KeywordPattern).getKey() and not exists(pair.getValue()) ) or - exists(Ruby::Pair pair | - i = 0 and - result = pair.getKey() and - pair = toGenerated(parent) and - not exists(pair.getValue()) - ) + parent = + any(Ruby::Pair pair | + i = 0 and + result = pair.getKey() and + not exists(pair.getValue()) + ) } private string keyName(Ruby::AstNode key) { @@ -1589,7 +1594,7 @@ private module ImplicitHashValueSynthesis { private class ImplicitHashValueSynthesis extends Synthesis { final override predicate child(AstNode parent, int i, Child child) { - exists(Ruby::AstNode key | key = keyWithoutValue(parent, i) | + exists(Ruby::AstNode key | key = keyWithoutValue(toGenerated(parent), i) | exists(TVariableReal variable | access(key, variable) and child = SynthChild(LocalVariableAccessRealKind(variable)) @@ -1616,7 +1621,7 @@ private module ImplicitHashValueSynthesis { } final override predicate location(AstNode n, Location l) { - exists(AstNode p, int i | l = keyWithoutValue(p, i).getLocation() | + exists(AstNode p, int i | l = keyWithoutValue(toGenerated(p), i).getLocation() | n = p.(HashPattern).getValue(i) or i = 0 and n = p.(Pair).getValue() diff --git a/ruby/ql/lib/codeql/ruby/ast/internal/Variable.qll b/ruby/ql/lib/codeql/ruby/ast/internal/Variable.qll index 7c130220a86..6e92b54c246 100644 --- a/ruby/ql/lib/codeql/ruby/ast/internal/Variable.qll +++ b/ruby/ql/lib/codeql/ruby/ast/internal/Variable.qll @@ -2,6 +2,7 @@ overlay[local] module; private import TreeSitter +private import codeql.namebinding.LocalNameBinding private import codeql.ruby.AST private import codeql.ruby.CFG private import codeql.ruby.ast.internal.AST @@ -94,10 +95,11 @@ predicate scopeDefinesParameterVariable( // In case of overlapping parameter names (e.g. `_`), only the first // parameter will give rise to a variable i = - min(Ruby::Identifier other | - parameterAssignment(scope, name, other, _) + min(Ruby::Identifier other, int startline, int startcolumn | + parameterAssignment(scope, name, other, _) and + other.getLocation().hasLocationInfo(_, startline, startcolumn, _, _) | - other order by other.getLocation().getStartLine(), other.getLocation().getStartColumn() + other order by startline, startcolumn ) and parameterAssignment(scope, name, _, pos) or @@ -113,7 +115,8 @@ predicate scopeDefinesParameterVariable( ) } -pragma[nomagic] +bindingset[i] +pragma[inline_late] private string variableNameInScope(Ruby::AstNode i, Scope::Range scope) { scope = scopeOf(i) and ( @@ -137,40 +140,142 @@ private predicate scopeAssigns(Scope::Range scope, string name, Ruby::AstNode i) name = variableNameInScope(i, scope) } +private module Input implements LocalNameBindingInputSig { + predicate cacheRevRef() { exists(TVariable v) implies any() } + + class AstNode = Ruby::AstNode; + + AstNode getChild(AstNode parent, int index) { + parent = parentOf(result) and + ( + index = result.getParentIndex() + or + not exists(result.getParentIndex()) and + index = -1 + ) + } + + class Conditional extends AstNode { + Conditional() { none() } + + AstNode getCondition() { none() } + + AstNode getThen() { none() } + + AstNode getElse() { none() } + } + + class SiblingShadowingDecl extends AstNode { + SiblingShadowingDecl() { none() } + + AstNode getLhs() { none() } + + AstNode getRhs() { none() } + + AstNode getElse() { none() } + } + + predicate isTopScope(AstNode scope) { + scope instanceof Scope::Range and + not ( + scope instanceof Ruby::Block or + scope instanceof Ruby::DoBlock or + scope instanceof Ruby::Lambda + ) + } + + private Scope::Range getParentScope(Scope::Range scope) { + result = scopeOf(scope) and + not isTopScope(scope) + } + + bindingset[name, scope] + pragma[inline_late] + private predicate declInScope0(AstNode definingNode, string name, AstNode scope) { + scopeDefinesParameterVariable(scope, name, definingNode, _) or + scopeAssigns(scope, name, definingNode) + } + + predicate declInScope(AstNode definingNode, string name, AstNode scope) { + scopeDefinesParameterVariable(scope, name, definingNode, _) + or + /* + * Variables are not declared explicitly in Ruby, so we consider the _first_ assignment to + * be the declaration: + * + * ```rb + * a = 1 # declares `a` + * a = 2 # does not declare `a` + * 1.times do | x | # declares `x` + * a = 2 # does not declare `a` + * end + * ``` + */ + + scopeAssigns(scope, name, definingNode) and + not scopeDefinesParameterVariable(scope, name, _, _) and + not exists(AstNode prev, AstNode prevScope | + prevScope = getParentScope*(scope) and + declInScope0(prev, name, prevScope) and + prev.getLocation().strictlyBefore(definingNode.getLocation()) + ) + } + + predicate implicitDeclInScope(string name, AstNode scope) { + name = "self" and + scope instanceof SelfBase::Range + } + + predicate accessCand(AstNode n, string name) { + name = variableNameInScope(n, _) and + ( + explicitAssignmentNode(n, _) + or + implicitAssignmentNode(n) + or + scopeDefinesParameterVariable(_, _, n, _) + or + vcall(n) + or + n = any(Ruby::VariableReferencePattern vr).getName() + or + n = ImplicitHashValueSynthesis::keyWithoutValue(_, _) + ) + or + n instanceof Ruby::Self and + name = "self" + } +} + +private import LocalNameBinding + cached private module Cached { cached newtype TVariable = - TGlobalVariable(string name) { name = any(Ruby::GlobalVariable var).getValue() } or + TGlobalVariable(string name) { + CachedStage::ref() and + name = any(Ruby::GlobalVariable var).getValue() + } or TClassVariable(Scope::Range scope, string name, Ruby::AstNode decl) { decl = - min(Ruby::ClassVariable other | - classVariableAccess(other, name, scope) + min(Ruby::ClassVariable other, int startline, int startcolumn | + classVariableAccess(other, name, scope) and + other.getLocation().hasLocationInfo(_, startline, startcolumn, _, _) | - other order by other.getLocation().getStartLine(), other.getLocation().getStartColumn() + other order by startline, startcolumn ) } or TInstanceVariable(Scope::Range scope, string name, boolean instance, Ruby::AstNode decl) { decl = - min(Ruby::InstanceVariable other | - instanceVariableAccess(other, name, scope, instance) + min(Ruby::InstanceVariable other, int startline, int startcolumn | + instanceVariableAccess(other, name, scope, instance) and + other.getLocation().hasLocationInfo(_, startline, startcolumn, _, _) | - other order by other.getLocation().getStartLine(), other.getLocation().getStartColumn() + other order by startline, startcolumn ) } or - TLocalVariableReal(Scope::Range scope, string name, Ruby::AstNode i) { - scopeDefinesParameterVariable(scope, name, i, _) - or - i = - min(Ruby::AstNode other | - scopeAssigns(scope, name, other) - | - other order by other.getLocation().getStartLine(), other.getLocation().getStartColumn() - ) and - not scopeDefinesParameterVariable(scope, name, _, _) and - not inherits(scope, name, _) - } or - TSelfVariable(SelfBase::Range scope) or + TLocalVariableReal(Local l) or TLocalVariableSynth(AstNode n, int i) { any(Synthesis s).localVariable(n, i) } // Db types that can be vcalls @@ -321,39 +426,37 @@ private module Cached { i = any(Ruby::ExpressionReferencePattern x).getValue() } - pragma[nomagic] - private predicate hasScopeAndName(VariableReal variable, Scope::Range scope, string name) { - variable.getNameImpl() = name and - scope = variable.getDeclaringScopeImpl() - } - cached predicate access(Ruby::AstNode access, VariableReal variable) { - exists(string name, Scope::Range scope | - pragma[only_bind_into](name) = variableNameInScope(access, scope) + exists(Local l | + variable = TLocalVariableReal(l) and + access = l.getAnAccess() | - hasScopeAndName(variable, scope, name) and - not access.getLocation().strictlyBefore(variable.getLocationImpl()) and - // In case of overlapping parameter names, later parameters should not - // be considered accesses to the first parameter - if parameterAssignment(_, _, access, _) - then scopeDefinesParameterVariable(_, _, access, _) - else any() + l instanceof ImplicitLocal or - exists(Scope::Range declScope | - hasScopeAndName(variable, declScope, pragma[only_bind_into](name)) and - inherits(scope, name, declScope) - ) + /* + * In the example below, `a` is declared in the scope of `M`, but only the + * second mention of `a` is an actual access: + * + * ```rb + * module M + * puts a # calls method `a` + * a = 1 # declares `a` + * puts a # accesses variable `a` + * end + * ``` + */ + + not access.getLocation().strictlyBefore(l.getDefiningNode().getLocation()) ) } private class Access extends Ruby::Token { Access() { - access(this.(Ruby::Identifier), _) or + access(this, _) or this instanceof Ruby::GlobalVariable or this instanceof Ruby::InstanceVariable or - this instanceof Ruby::ClassVariable or - this instanceof Ruby::Self + this instanceof Ruby::ClassVariable } } @@ -398,29 +501,6 @@ private module Cached { import Cached -/** Holds if this scope inherits `name` from an outer scope `outer`. */ -private predicate inherits(Scope::Range scope, string name, Scope::Range outer) { - ( - scope instanceof Ruby::Block or - scope instanceof Ruby::DoBlock or - scope instanceof Ruby::Lambda - ) and - not scopeDefinesParameterVariable(scope, name, _, _) and - ( - outer = scope.getOuterScope() and - ( - scopeDefinesParameterVariable(outer, name, _, _) - or - exists(Ruby::AstNode i | - scopeAssigns(outer, name, i) and - i.getLocation().strictlyBefore(scope.getLocation()) - ) - ) - or - inherits(scope.getOuterScope(), name, outer) - ) -} - abstract class VariableImpl extends TVariable { abstract string getNameImpl(); @@ -429,10 +509,9 @@ abstract class VariableImpl extends TVariable { abstract Location getLocationImpl(); } -class TVariableReal = - TGlobalVariable or TClassVariable or TInstanceVariable or TLocalVariableReal or TSelfVariable; +class TVariableReal = TGlobalVariable or TClassVariable or TInstanceVariable or TLocalVariableReal; -class TLocalVariable = TLocalVariableReal or TLocalVariableSynth or TSelfVariable; +class TLocalVariable = TLocalVariableReal or TLocalVariableSynth; /** * A "real" (i.e. non-synthesized) variable. This class only exists to @@ -458,19 +537,19 @@ private class VariableRealAdapter extends VariableImpl, TVariableReal instanceof } class LocalVariableReal extends VariableReal, TLocalVariableReal { - private Scope::Range scope; - private string name; - private Ruby::AstNode i; + private Local l; - LocalVariableReal() { this = TLocalVariableReal(scope, name, i) } + LocalVariableReal() { this = TLocalVariableReal(l) } - final override string getNameImpl() { result = name } + Ruby::AstNode getDefiningNode() { result = l.getDefiningNode() } - final override Location getLocationImpl() { result = i.getLocation() } + final override string getNameImpl() { result = l.getName() } - final override Scope::Range getDeclaringScopeImpl() { result = scope } + final override Location getLocationImpl() { result = l.getLocation() } - final VariableAccess getDefiningAccessImpl() { toGenerated(result) = i } + final override Scope::Range getDeclaringScopeImpl() { result = l.getScope() } + + final VariableAccess getDefiningAccessImpl() { toGenerated(result) = l.getDefiningNode() } } class LocalVariableSynth extends VariableImpl, TLocalVariableSynth { @@ -531,34 +610,16 @@ class ClassVariableImpl extends VariableReal, TClassVariable { final override Scope::Range getDeclaringScopeImpl() { result = scope } } -class SelfVariableImpl extends VariableReal, TSelfVariable { - private SelfBase::Range scope; +class SelfVariableImpl extends LocalVariableReal { + private ImplicitLocal l; - SelfVariableImpl() { this = TSelfVariable(scope) } - - final override string getNameImpl() { result = "self" } - - final override Location getLocationImpl() { result = scope.getLocation() } - - final override Scope::Range getDeclaringScopeImpl() { result = scope } + SelfVariableImpl() { this = TLocalVariableReal(l) } } abstract class VariableAccessImpl extends Expr, TVariableAccess { abstract VariableImpl getVariableImpl(); } -module LocalVariableAccess { - predicate range(Ruby::Identifier id, TLocalVariableReal v) { - access(id, v) and - ( - explicitWriteAccess(id, _) or - implicitWriteAccess(id) or - vcall(id) or - id = any(Ruby::VariableReferencePattern vr).getName() - ) - } -} - class TVariableAccessReal = TLocalVariableAccessReal or TGlobalVariableAccess or TInstanceVariableAccess or TClassVariableAccess; @@ -681,7 +742,8 @@ private class SelfVariableAccessReal extends SelfVariableAccessImpl, TSelfReal { SelfVariableAccessReal() { exists(Ruby::Self self | - this = TSelfReal(self) and var = TSelfVariable(scopeOf(self).getEnclosingSelfScope()) + this = TSelfReal(self) and + access(self, var) ) } diff --git a/ruby/ql/lib/qlpack.yml b/ruby/ql/lib/qlpack.yml index 3029a6d098f..399564bdb33 100644 --- a/ruby/ql/lib/qlpack.yml +++ b/ruby/ql/lib/qlpack.yml @@ -14,6 +14,7 @@ dependencies: codeql/ssa: ${workspace} codeql/tutorial: ${workspace} codeql/util: ${workspace} + codeql/namebinding: ${workspace} dataExtensions: - codeql/ruby/frameworks/**/model.yml - codeql/ruby/frameworks/**/*.model.yml