mirror of
https://github.com/github/codeql.git
synced 2026-04-25 08:45:14 +02:00
Python: Fix join-order problem in SqlAlchemy
No major performance impact, more of a learning example for myself (had +3000 join order badness).
Initial tuple counts
```
Evaluated recursive predicate SqlAlchemy::SqlAlchemy::Connection::ConnectionConstruction#45e716e0@594cfx2g in 1ms on iteration 1 (delta size: 4).
Evaluated relational algebra for predicate SqlAlchemy::SqlAlchemy::Connection::ConnectionConstruction#45e716e0@594cfx2g on iteration 1 running pipeline base with tuple counts:
37793 ~0% {3} r1 = JOIN `ApiGraphs::API::Node.getACall/0#dispred#312deb92_10#join_rhs` WITH DataFlowPublic::CallCfgNode#b8ddbf81 ON FIRST 1 OUTPUT Lhs.1, Lhs.0, Rhs.1
0 ~0% {2} | JOIN WITH `SqlAlchemy::SqlAlchemy::Connection::classRef/0#565fc3ad` ON FIRST 1 OUTPUT Lhs.1, Lhs.2
30 ~0% {5} r2 = JOIN DataFlowPublic::CallCfgNode#b8ddbf81 WITH `DataFlowPublic::MethodCallNode.calls/2#dispred#1dd1e0f4#ffb` ON FIRST 1 OUTPUT Lhs.0, Lhs.1, Rhs.1, Rhs.2, _
{4} | REWRITE WITH NOT [NOT [Tmp.4 := "begin", TEST InOut.3 = Tmp.4], NOT [Tmp.4 := "connect", TEST InOut.3 = Tmp.4]] KEEPING 4
21 ~0% {3} | SCAN OUTPUT In.2, In.0, In.1
4 ~0% {2} | JOIN WITH `SqlAlchemy::SqlAlchemy::Engine::instance/0#1828baef` ON FIRST 1 OUTPUT Lhs.1, Lhs.2
4 ~0% {2} r3 = r1 UNION r2
return r3
```
which is fixed by the only_bind_out
```
Evaluated recursive predicate SqlAlchemy::SqlAlchemy::Connection::ConnectionConstruction#45e716e0@49effxtg in 0ms on iteration 1 (delta size: 0).
Evaluated relational algebra for predicate SqlAlchemy::SqlAlchemy::Connection::ConnectionConstruction#45e716e0@49effxtg on iteration 1 running pipeline base with tuple counts:
0 ~0% {1} r1 = JOIN `SqlAlchemy::SqlAlchemy::Connection::classRef/0#565fc3ad` WITH `ApiGraphs::API::Node.getACall/0#dispred#312deb92` ON FIRST 1 OUTPUT Rhs.1
0 ~0% {2} | JOIN WITH DataFlowPublic::CallCfgNode#b8ddbf81 ON FIRST 1 OUTPUT Lhs.0, Rhs.1
return r1
```
We also had this initial problem
```
Evaluated recursive predicate SqlAlchemy::SqlAlchemy::Connection::ConnectionConstruction#45e716e0@594cfx2g in 1ms on iteration 4 (delta size: 0).
Evaluated relational algebra for predicate SqlAlchemy::SqlAlchemy::Connection::ConnectionConstruction#45e716e0@594cfx2g on iteration 4 running pipeline standard with tuple counts:
48722 ~6% {2} r1 = DataFlowPublic::CallCfgNode#b8ddbf81 AND NOT SqlAlchemy::SqlAlchemy::Connection::ConnectionConstruction#45e716e0#prev(FIRST 2)
48722 ~3% {3} r2 = SCAN r1 OUTPUT In.0, _, In.1
48722 ~1% {3} | REWRITE WITH Out.1 := "connect"
16 ~0% {3} | JOIN WITH `DataFlowPublic::MethodCallNode.calls/2#dispred#1dd1e0f4#ffb_021#join_rhs` ON FIRST 2 OUTPUT Rhs.2, Lhs.0, Lhs.2
0 ~0% {2} | JOIN WITH `SqlAlchemy::SqlAlchemy::Connection::instance/0#5ed87c17#prev_delta` ON FIRST 1 OUTPUT Lhs.1, Lhs.2
48722 ~3% {3} r3 = SCAN r1 OUTPUT In.0, _, In.1
48722 ~2% {3} | REWRITE WITH Out.1 := "execution_options"
9 ~0% {3} | JOIN WITH `DataFlowPublic::MethodCallNode.calls/2#dispred#1dd1e0f4#ffb_021#join_rhs` ON FIRST 2 OUTPUT Rhs.2, Lhs.0, Lhs.2
0 ~0% {2} | JOIN WITH `SqlAlchemy::SqlAlchemy::Connection::instance/0#5ed87c17#prev_delta` ON FIRST 1 OUTPUT Lhs.1, Lhs.2
0 ~0% {2} r4 = r2 UNION r3
return r4
```
which is fixed by `connectionConstruction_helper`
```
Evaluated recursive predicate SqlAlchemy::SqlAlchemy::Connection::helper/0#62cfc178#b@4f295yef in 1ms on iteration 4 (delta size: 0).
Evaluated relational algebra for predicate SqlAlchemy::SqlAlchemy::Connection::helper/0#62cfc178#b@4f295yef on iteration 4 running pipeline standard with tuple counts:
4 ~0% {1} r1 = JOIN `SqlAlchemy::SqlAlchemy::Connection::instance/1#029b4c87#prev_delta` WITH `TypeTrackingImpl::TypeTracker::end/0#2ac2cfd4` ON FIRST 1 OUTPUT Lhs.1
16 ~0% {1} | JOIN WITH `LocalSources::Cached::hasLocalSource/2#8b3ee0ec_10#join_rhs` ON FIRST 1 OUTPUT Rhs.1
0 ~0% {3} | JOIN WITH `DataFlowPublic::MethodCallNode.calls/2#dispred#1dd1e0f4#ffb_102#join_rhs` ON FIRST 1 OUTPUT Rhs.1, Rhs.2, _
0 ~0% {2} | REWRITE WITH NOT [NOT [Tmp.2 := "connect", TEST InOut.1 = Tmp.2], NOT [Tmp.2 := "execution_options", TEST InOut.1 = Tmp.2]] KEEPING 2
0 ~0% {1} | JOIN WITH DataFlowPublic::CallCfgNode#b8ddbf81 ON FIRST 1 OUTPUT Lhs.0
0 ~0% {1} | AND NOT `SqlAlchemy::SqlAlchemy::Connection::helper/0#62cfc178#b#prev`(FIRST 1)
return r1
```
This commit is contained in:
@@ -113,15 +113,25 @@ module SqlAlchemy {
|
||||
*/
|
||||
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
|
||||
|
||||
/**
|
||||
* join-ordering helper for ConnectionConstruction char-pred -- without this would
|
||||
* start with _all_ `CallCfgNode` and join those with `MethodCallNode` .. which is
|
||||
* silly
|
||||
*/
|
||||
pragma[noinline]
|
||||
private DataFlow::MethodCallNode connectionConstruction_helper() {
|
||||
result.calls(Engine::instance(), ["begin", "connect"])
|
||||
or
|
||||
result.calls(instance(), ["connect", "execution_options"])
|
||||
}
|
||||
|
||||
private class ConnectionConstruction extends InstanceSource, DataFlow::CallCfgNode {
|
||||
ConnectionConstruction() {
|
||||
this = classRef().getACall()
|
||||
// without the `pragma[only_bind_out]` we would start with joining
|
||||
// `API::Node.getACall` with `CallCfgNode` which is not optimal
|
||||
this = pragma[only_bind_out](classRef().getACall())
|
||||
or
|
||||
this.(DataFlow::MethodCallNode).calls(Engine::instance(), ["begin", "connect"])
|
||||
or
|
||||
this.(DataFlow::MethodCallNode).calls(instance(), "connect")
|
||||
or
|
||||
this.(DataFlow::MethodCallNode).calls(instance(), "execution_options")
|
||||
this = connectionConstruction_helper()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user