From 95d235416c184b49941f56dfd195fb1f227995d5 Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 29 Mar 2022 13:29:15 +0000 Subject: [PATCH] Python: Fix bad antijoin in `getAKeyword` Before: ``` Tuple counts for Exprs::Call::getAKeyword_dispred#ff#antijoin_rhs/3@7bc202ij after 9s: 1 ~0% {1} r1 = CONSTANT(unique int)[2] 4244385 ~2% {1} r2 = JOIN r1 WITH py_dict_items_10#join_rhs ON FIRST 1 OUTPUT Rhs.1 'arg0' 4244352 ~3% {3} r3 = JOIN r2 WITH AstGenerated::Call_::getNamedArg_dispred#ffb_201#join_rhs ON FIRST 1 OUTPUT Rhs.1 'arg1', Lhs.0 'arg0', Rhs.2 'arg2' 66618690 ~3% {5} r4 = JOIN r3 WITH AstGenerated::Call_::getNamedArg_dispred#ffb ON FIRST 1 OUTPUT Lhs.1 'arg0', Lhs.0 'arg1', Lhs.2 'arg2', Rhs.1, Rhs.2 31187133 ~0% {5} r5 = SELECT r4 ON In.3 < In.2 'arg2' 31187133 ~1% {5} r6 = SCAN r5 OUTPUT In.4, 0, In.0 'arg0', In.1 'arg1', In.2 'arg2' 0 ~0% {3} r7 = JOIN r6 WITH py_dict_items ON FIRST 2 OUTPUT Lhs.2 'arg0', Lhs.3 'arg1', Lhs.4 'arg2' return r7 Tuple counts for Exprs::Call::getAKeyword_dispred#ff/2@1dc9468b after 421ms: 1 ~0% {1} r1 = CONSTANT(unique int)[2] 4244385 ~2% {1} r2 = JOIN r1 WITH py_dict_items_10#join_rhs ON FIRST 1 OUTPUT Rhs.1 'result' 4244352 ~0% {3} r3 = JOIN r2 WITH AstGenerated::Call_::getNamedArg_dispred#ffb_201#join_rhs ON FIRST 1 OUTPUT Lhs.0 'result', Rhs.1 'this', Rhs.2 4244352 ~0% {3} r4 = r3 AND NOT Exprs::Call::getAKeyword_dispred#ff#antijoin_rhs(Lhs.0 'result', Lhs.1 'this', Lhs.2) 4244352 ~6% {2} r5 = SCAN r4 OUTPUT In.1 'this', In.0 'result' return r5 ``` Oof. All that work to produce zero tuples. Luckily we can improve matters somewhat. Basically, there's no reason to test _all_ dictionary unpackings, since we're only interested in a lower bound. Thus, we can use `min` instead which is much more efficient. For convenience I factored this into its own (private) helper predicate. Now the tuple counts look as follows: ``` Tuple counts for Exprs::Call::getMinimumUnpackingIndex_dispred#ff#min_range/2@39b0e9sm after 1ms: 246 ~0% {2} r1 = JOIN Keywords::DictUnpackingOrKeyword#class#f#shared WITH AstGenerated::Call_::getNamedArg_dispred#ffb_201#join_rhs ON FIRST 1 OUTPUT Rhs.1 'arg0', Rhs.2 'arg1' return r1 Registering Exprs::Call::getMinimumUnpackingIndex_dispred#ff#min_range/2@39b0e9sm + with content 9ea2f123k8necpu015v6tpsc2t1 >>> Created relation Exprs::Call::getMinimumUnpackingIndex_dispred#ff#min_range/2@39b0e9sm with 246 rows. Starting to evaluate predicate Exprs::Call::getMinimumUnpackingIndex_dispred#ff#min_term/3@9f4ca5g8 Tuple counts for Exprs::Call::getMinimumUnpackingIndex_dispred#ff#min_term/3@9f4ca5g8 after 0ms: 246 ~2% {3} r1 = JOIN Keywords::DictUnpackingOrKeyword#class#f#shared WITH AstGenerated::Call_::getNamedArg_dispred#ffb_201#join_rhs ON FIRST 1 OUTPUT Rhs.1 'arg0', Rhs.2 'arg2', Rhs.2 'arg2' return r1 Tuple counts for Exprs::Call::getAKeyword_dispred#ff/2@000a0alb after 906ms: 1 ~0% {1} r1 = CONSTANT(unique int)[2] 4244385 ~2% {1} r2 = JOIN r1 WITH py_dict_items_10#join_rhs ON FIRST 1 OUTPUT Rhs.1 'result' 4244352 ~0% {3} r3 = JOIN r2 WITH AstGenerated::Call_::getNamedArg_dispred#ffb_201#join_rhs ON FIRST 1 OUTPUT Lhs.0 'result', Rhs.1 'this', Rhs.2 4244280 ~0% {3} r4 = r3 AND NOT Exprs::Call::getMinimumUnpackingIndex_dispred#ff_0#antijoin_rhs(Lhs.1 'this') 4244280 ~6% {2} r5 = SCAN r4 OUTPUT In.1 'this', In.0 'result' 4244352 ~3% {3} r6 = JOIN r2 WITH AstGenerated::Call_::getNamedArg_dispred#ffb_201#join_rhs ON FIRST 1 OUTPUT Rhs.1 'this', Lhs.0 'result', Rhs.2 72 ~4% {4} r7 = JOIN r6 WITH Exprs::Call::getMinimumUnpackingIndex_dispred#ff ON FIRST 1 OUTPUT Lhs.1 'result', Lhs.0 'this', Lhs.2, Rhs.1 72 ~4% {4} r8 = SELECT r7 ON In.2 <= In.3 72 ~0% {2} r9 = SCAN r8 OUTPUT In.1 'this', In.0 'result' 4244352 ~6% {2} r10 = r5 UNION r9 return r10 ``` This is not the perfect join order (note the similarity between `r3` and `r6`) but overall it's a win. --- python/ql/lib/semmle/python/Exprs.qll | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/Exprs.qll b/python/ql/lib/semmle/python/Exprs.qll index 81df00590ad..360408cecef 100644 --- a/python/ql/lib/semmle/python/Exprs.qll +++ b/python/ql/lib/semmle/python/Exprs.qll @@ -189,7 +189,16 @@ class Call extends Call_ { */ Keyword getKeyword(int index) { result = this.getNamedArg(index) and - not exists(DictUnpacking d, int lower | d = this.getNamedArg(lower) and lower < index) + ( + not exists(this.getMinimumUnpackingIndex()) + or + index <= this.getMinimumUnpackingIndex() + ) + } + + /** Gets the minimum index (if any) at which a dictionary unpacking (`**foo`) occurs in this call. */ + private int getMinimumUnpackingIndex() { + result = min(int i | this.getNamedArg(i) instanceof DictUnpacking) } /**