mirror of
https://github.com/github/codeql.git
synced 2026-04-28 02:05:14 +02:00
Merge pull request #18304 from yoff/ruby/performance-queries
Ruby: Query for database calls in a loop
This commit is contained in:
@@ -254,9 +254,8 @@ private Expr getUltimateReceiver(MethodCall call) {
|
||||
)
|
||||
}
|
||||
|
||||
// A call to `find`, `where`, etc. that may return active record model object(s)
|
||||
private class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode
|
||||
{
|
||||
/** A call to `find`, `where`, etc. that may return active record model object(s) */
|
||||
class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode {
|
||||
private ActiveRecordModelClass cls;
|
||||
|
||||
ActiveRecordModelFinderCall() {
|
||||
|
||||
23
ruby/ql/src/queries/performance/DatabaseQueryInLoop.qhelp
Normal file
23
ruby/ql/src/queries/performance/DatabaseQueryInLoop.qhelp
Normal file
@@ -0,0 +1,23 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
When a database query operation, for example a call to a query method in the Rails `ActiveRecord::Relation` class, is executed in a loop, this can lead to a performance issue known as an "n+1 query problem".
|
||||
The database query will be executed in each iteration of the loop.
|
||||
Performance can usually be improved by performing a single database query outside of a loop, which retrieves all the required objects in a single operation.
|
||||
</p>
|
||||
</overview>
|
||||
<recommendation>
|
||||
<p>If possible, pull the database query out of the loop and rewrite it to retrieve all the required objects. This replaces multiple database operations with a single one.
|
||||
</p>
|
||||
</recommendation>
|
||||
<example>
|
||||
<p>The following (suboptimal) example code queries the <code>User</code> object in each iteration of the loop:</p>
|
||||
<sample src="examples/straight_loop.rb" />
|
||||
<p>To improve the performance, we instead query the <code>User</code> object once outside the loop, gathering all necessary information in a single query:</p>
|
||||
<sample src="examples/preload.rb" />
|
||||
</example>
|
||||
</qhelp>
|
||||
74
ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
Normal file
74
ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
Normal file
@@ -0,0 +1,74 @@
|
||||
/**
|
||||
* @name Database query in a loop
|
||||
* @description Database queries in a loop can lead to an unnecessary amount of database calls and poor performance.
|
||||
* @kind problem
|
||||
* @problem.severity info
|
||||
* @precision high
|
||||
* @id rb/database-query-in-loop
|
||||
* @tags performance
|
||||
*/
|
||||
|
||||
import ruby
|
||||
private import codeql.ruby.AST
|
||||
import codeql.ruby.ast.internal.Constant
|
||||
import codeql.ruby.Concepts
|
||||
import codeql.ruby.frameworks.ActiveRecord
|
||||
private import codeql.ruby.TaintTracking
|
||||
private import codeql.ruby.CFG
|
||||
private import codeql.ruby.controlflow.internal.Guards as Guards
|
||||
|
||||
/** Gets the name of a built-in method that involves a loop operation. */
|
||||
string getALoopMethodName() {
|
||||
result in [
|
||||
"each", "reverse_each", "map", "map!", "foreach", "flat_map", "in_batches", "one?", "all?",
|
||||
"collect", "collect!", "select", "select!", "reject", "reject!"
|
||||
]
|
||||
}
|
||||
|
||||
/** A loop, represented by a call to a loop operation. */
|
||||
class LoopingCall extends DataFlow::CallNode {
|
||||
Callable loopScope;
|
||||
|
||||
LoopingCall() {
|
||||
this.getMethodName() = getALoopMethodName() and
|
||||
loopScope = this.getBlock().asCallable().asCallableAstNode()
|
||||
}
|
||||
|
||||
/** Holds if `c` is executed as part of the body of this loop. */
|
||||
predicate executesCall(DataFlow::CallNode c) { c.asExpr().getScope() = loopScope }
|
||||
}
|
||||
|
||||
/** Holds if `ar` influences a guard that may control the execution of a loop. */
|
||||
predicate usedInLoopControlGuard(ActiveRecordInstance ar) {
|
||||
exists(DataFlow::Node insideGuard, CfgNodes::ExprCfgNode guard |
|
||||
// For a guard like `cond && ar`, the whole guard will not be tainted
|
||||
// so we need to look at the taint of the individual parts.
|
||||
insideGuard.asExpr().getExpr() = guard.getExpr().getAChild*()
|
||||
|
|
||||
TaintTracking::localTaint(ar, insideGuard) and
|
||||
guardForLoopControl(guard, _)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `guard` controls `break` and `break` would break out of a loop. */
|
||||
predicate guardForLoopControl(CfgNodes::ExprCfgNode guard, CfgNodes::AstCfgNode break) {
|
||||
Guards::guardControlsBlock(guard, break.getBasicBlock(), _) and
|
||||
(
|
||||
break.(CfgNodes::ExprNodes::MethodCallCfgNode).getMethodName() = "raise"
|
||||
or
|
||||
break instanceof CfgNodes::ReturningCfgNode
|
||||
)
|
||||
}
|
||||
|
||||
from LoopingCall loop, ActiveRecordModelFinderCall call
|
||||
where
|
||||
loop.executesCall(call) and
|
||||
// Disregard loops over constants
|
||||
not isArrayConstant(loop.getReceiver().asExpr(), _) and
|
||||
// Disregard cases where the looping is influenced by the query result
|
||||
not usedInLoopControlGuard(call) and
|
||||
// Only report calls that are likely to be expensive
|
||||
not call.getMethodName() in ["new", "create"]
|
||||
select call,
|
||||
"This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop.",
|
||||
loop, "this loop"
|
||||
14
ruby/ql/src/queries/performance/examples/preload.rb
Normal file
14
ruby/ql/src/queries/performance/examples/preload.rb
Normal file
@@ -0,0 +1,14 @@
|
||||
# Preload User data
|
||||
user_data = User.where(login: repo_names_by_owner.keys).pluck(:login, :id, :type).to_h do |login, id, type|
|
||||
[login, { id: id, type: type == "User" ? "USER" : "ORGANIZATION" }]
|
||||
end
|
||||
|
||||
repo_names_by_owner.each do |owner_slug, repo_names|
|
||||
owner_info = user_data[owner_slug]
|
||||
owner_id = owner_info[:id]
|
||||
owner_type = owner_info[:type]
|
||||
rel_conditions = { owner_id: owner_id, name: repo_names }
|
||||
|
||||
nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg
|
||||
nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg
|
||||
end
|
||||
@@ -0,0 +1,8 @@
|
||||
repo_names_by_owner.map do |owner_slug, repo_names|
|
||||
owner_id, owner_type = User.where(login: owner_slug).pluck(:id, :type).first
|
||||
owner_type = owner_type == "User" ? "USER" : "ORGANIZATION"
|
||||
rel_conditions = { owner_id: owner_id, name: repo_names }
|
||||
|
||||
nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg
|
||||
nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg
|
||||
end
|
||||
@@ -0,0 +1,3 @@
|
||||
| DatabaseQueryInLoop.rb:11:13:11:52 | call to first | This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop. | DatabaseQueryInLoop.rb:10:9:12:11 | call to map | this loop |
|
||||
| DatabaseQueryInLoop.rb:16:20:16:59 | call to first | This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop. | DatabaseQueryInLoop.rb:15:9:21:11 | call to map | this loop |
|
||||
| DatabaseQueryInLoop.rb:19:17:19:56 | call to first | This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop. | DatabaseQueryInLoop.rb:18:13:20:15 | call to map | this loop |
|
||||
@@ -0,0 +1,2 @@
|
||||
query: queries/performance/DatabaseQueryInLoop.ql
|
||||
postprocess: utils/test/InlineExpectationsTestQuery.ql
|
||||
@@ -0,0 +1,61 @@
|
||||
|
||||
class User < ActiveRecord::Base
|
||||
end
|
||||
|
||||
class DatabaseQueryInLoopTest
|
||||
def test
|
||||
### These are bad
|
||||
|
||||
# simple query in loop
|
||||
names.map do |name|
|
||||
User.where(login: name).pluck(:id).first # $ Alert
|
||||
end
|
||||
|
||||
# nested loop
|
||||
names.map do |name|
|
||||
user = User.where(login: name).pluck(:id).first # $ Alert
|
||||
|
||||
ids.map do |user_id|
|
||||
User.where(id: user_id).pluck(:id).first # $ Alert
|
||||
end
|
||||
end
|
||||
|
||||
### These are OK
|
||||
|
||||
# Not in loop
|
||||
User.where(login: owner_slug).pluck(:id).first
|
||||
|
||||
# Loops over constant array
|
||||
%w(first-name second-name).map { |name| User.where(login: name).pluck(:id).first }
|
||||
|
||||
constant_names = [first-name, second-name]
|
||||
constant_names.each do |name|
|
||||
User.where(login: name).pluck(:id).first
|
||||
end
|
||||
|
||||
# Loop traversal is influenced by query result
|
||||
# raising an exception if the user is not found
|
||||
names.map do |name|
|
||||
user = User.where(login: name).pluck(:id).first
|
||||
unless user
|
||||
raise Error.new("User '#{name}' not found")
|
||||
end
|
||||
end
|
||||
|
||||
# more complicated condition
|
||||
names.map do |name|
|
||||
user = User.where(login: name).pluck(:id).first
|
||||
unless cond && user
|
||||
raise Error.new("User '#{name}' not found")
|
||||
end
|
||||
end
|
||||
|
||||
# skipping through the loop when users are not relevant
|
||||
names.map do |name|
|
||||
user = User.where(login: name).pluck(:id).first
|
||||
if not isRelevant(user)
|
||||
next
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user