ruby: Add query for hoisting Rails ActiveRecord calls

This does not take assicoations into account.
It uses ActiveRecordModelFinderCall to identify relevant calls.
This class has therefor been made public.
This commit is contained in:
Rasmus Lerchedahl Petersen
2024-11-27 23:49:46 +01:00
committed by yoff
parent d7117ef3e5
commit 5feb401607
5 changed files with 136 additions and 3 deletions

View File

@@ -254,9 +254,8 @@ private Expr getUltimateReceiver(MethodCall call) {
)
}
// A call to `find`, `where`, etc. that may return active record model object(s)
private class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode
{
/** A call to `find`, `where`, etc. that may return active record model object(s) */
class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode {
private ActiveRecordModelClass cls;
ActiveRecordModelFinderCall() {

View File

@@ -0,0 +1,22 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
When a Rails ActiveRecord query is executed in a loop, it is potentially an n+1 problem.
This query identifies situations where an ActiveRecord query execution could be pulled out of a loop.
</p>
</overview>
<recommendation>
<p>If possible, pull the query out of the loop, thus replacing the many calls with a single one.
</p>
</recommendation>
<example>
<p>The following (suboptimal) example code queries the User object in each iteration of the loop:</p>
<sample src="examples/straight_loop.rb" />
<p>To improve the performance, we instead query the User object once outside the loop, gathereing all necessary information:</p>
<sample src="examples/preload.rb" />
</example>
</qhelp>

View File

@@ -0,0 +1,90 @@
/**
* @name Could be hoisted
* @description Hoist Rails `ActiveRecord::Relation` query calls out of loops.
* @kind problem
* @problem.severity info
* @precision high
* @id rb/could-be-hoisted
* @tags performance
*/
// Possible Improvements;
// - Consider also Associations.
// Associations are lazy-loading by default, so something like
// in a loop over `article` do
// `article.book`
// if you have 1000 articles it will do a 1000 calls to `book`.
// If you already did `article includes book`, there should be no problem.
// - Consider instances of ActiveRecordInstanceMethodCall, for instance
// calls to `pluck`.
import ruby
private import codeql.ruby.AST
import codeql.ruby.ast.internal.Constant
import codeql.ruby.Concepts
import codeql.ruby.frameworks.ActiveRecord
private import codeql.ruby.TaintTracking
string loopMethodName() {
result in [
"each", "reverse_each", "map", "map!", "foreach", "flat_map", "in_batches", "one?", "all?",
"collect", "collect!", "select", "select!", "reject", "reject!"
]
}
class LoopingCall extends DataFlow::CallNode {
DataFlow::CallableNode loopBlock;
LoopingCall() {
this.getMethodName() = loopMethodName() and loopBlock = this.getBlock().asCallable()
}
DataFlow::CallableNode getLoopBlock() { result = loopBlock }
}
predicate happensInLoop(LoopingCall loop, DataFlow::CallNode e) {
loop.getLoopBlock().asCallableAstNode() = e.asExpr().getScope()
}
predicate happensInOuterLoop(LoopingCall outerLoop, DataFlow::CallNode e) {
exists(LoopingCall innerLoop |
happensInLoop(outerLoop, innerLoop) and
happensInLoop(innerLoop, e)
)
}
predicate happensInInnermostLoop(LoopingCall loop, DataFlow::CallNode e) {
happensInLoop(loop, e) and
not happensInOuterLoop(loop, e)
}
// The ActiveRecord instance is used to potentially control the loop
predicate usedInLoopControlGuard(ActiveRecordInstance ar, DataFlow::Node guard) {
TaintTracking::localTaint(ar, guard) and
guard = guardForLoopControl(_, _)
}
// A guard for controlling the loop
DataFlow::Node guardForLoopControl(ConditionalExpr cond, Stmt control) {
result.asExpr().getAstNode() = cond.getCondition().getAChild*() and
(
control.(MethodCall).getMethodName() = "raise"
or
control instanceof NextStmt
) and
control = cond.getBranch(_).getAChild()
}
from LoopingCall loop, DataFlow::CallNode call
where
// Disregard loops over constants
not isArrayConstant(loop.getReceiver().asExpr(), _) and
// Disregard tests
not call.getLocation().getFile().getAbsolutePath().matches("%test%") and
// Disregard cases where the looping is influenced by the query result
not usedInLoopControlGuard(call, _) and
// Only report the inner most loop
happensInInnermostLoop(loop, call) and
// Only report calls that are likely to be expensive
call instanceof ActiveRecordModelFinderCall and
not call.getMethodName() in ["new", "create"]
select call, "This call happens inside $@, and could be hoisted.", loop, "this loop"

View File

@@ -0,0 +1,14 @@
# Preload User data
user_data = User.where(login: repo_names_by_owner.keys).pluck(:login, :id, :type).to_h do |login, id, type|
[login, { id: id, type: type == "User" ? "USER" : "ORGANIZATION" }]
end
repo_names_by_owner.each do |owner_slug, repo_names|
owner_info = user_data[owner_slug]
owner_id = owner_info[:id]
owner_type = owner_info[:type]
rel_conditions = { owner_id: owner_id, name: repo_names }
nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg
nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg
end

View File

@@ -0,0 +1,8 @@
repo_names_by_owner.map do |owner_slug, repo_names|
owner_id, owner_type = User.where(login: owner_slug).pluck(:id, :type).first
owner_type = owner_type == "User" ? "USER" : "ORGANIZATION"
rel_conditions = { owner_id: owner_id, name: repo_names }
nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg
nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg
end