Files
codeql/python/ql/src/Classes/MethodCallOrder.qll
Taus Brock-Nannestad 2b24eb2e70 Python: Fix bad join order for py/multiple-calls-to-init.
The `multiple_invocation_paths` predicate had a bad join order where
we (essentially) joined `i1` with `i2` and only then joined `i1` and `i2`
separately to reduce the number of tuples. The join coming from `i1 != i2` had
little impact, but `i1.getFunction() = multi` made a big difference (and
similarly for `i2`). I factored out the code so that these joins would be done
more eagerly. Thus, we went from

```
[2019-11-06 16:53:05] (38s) Starting to evaluate predicate MethodCallOrder::multiple_invocation_paths#ffff/4@2ce75a
[2019-11-06 16:53:35] (68s) Tuple counts for MethodCallOrder::multiple_invocation_paths#ffff:
                      134547    ~9%       {2} r1 = SCAN CallGraph::TInvocation#fff AS I OUTPUT I.<0>, I.<2>
                      235284431 ~3%       {4} r2 = JOIN r1 WITH CallGraph::TInvocation#fff AS R ON FIRST 1 OUTPUT r1.<0>, r1.<1>, R.<1>, R.<2>
                      235149884 ~3%       {4} r3 = SELECT r2 ON r2.<3> != r2.<1>
                      235149884 ~4%       {3} r4 = SCAN r3 OUTPUT r3.<1>, r3.<0>, r3.<3>
                      166753634 ~5%       {4} r5 = JOIN r4 WITH #CallGraph::FunctionInvocation::getACallee_dispred#ffPlus#swapped AS R ON FIRST 1 OUTPUT R.<1>, r4.<2>, r4.<1>, r4.<0>
                      129778    ~0%       {4} r6 = JOIN r5 WITH #CallGraph::FunctionInvocation::getACallee_dispred#ffPlus AS R ON FIRST 2 OUTPUT r5.<0>, r5.<3>, r5.<1>, r5.<2>
                                          return r6
[2019-11-06 16:53:35] (68s) Registering MethodCallOrder::multiple_invocation_paths#ffff + [] with content 1705dcbc08kd9aa40rp2g2e9civhv
[2019-11-06 16:53:35] (68s)  >>> Wrote relation MethodCallOrder::multiple_invocation_paths#ffff with 129778 rows and 4 columns.
```

to

```
[2019-11-06 17:22:22] (25s) Starting to evaluate predicate MethodCallOrder::multiple_invocation_paths_helper#ffff/4@586aec
[2019-11-06 17:22:22] (25s) Tuple counts for MethodCallOrder::multiple_invocation_paths_helper#ffff:
                      134547 ~0%     {2} r1 = SCAN CallGraph::TInvocation#fff AS I OUTPUT I.<2>, I.<0>
                      88111  ~4%     {3} r2 = JOIN r1 WITH #CallGraph::FunctionInvocation::getACallee_dispred#ffPlus#swapped AS R ON FIRST 1 OUTPUT R.<1>, r1.<1>, r1.<0>
                      761305 ~0%     {4} r3 = JOIN r2 WITH #CallGraph::FunctionInvocation::getACallee_dispred#ffPlus AS R ON FIRST 1 OUTPUT r2.<1>, r2.<2>, r2.<0>, R.<1>
                      673194 ~0%     {4} r4 = SELECT r3 ON r3.<3> != r3.<1>
                      673194 ~0%     {4} r5 = SCAN r4 OUTPUT r4.<2>, r4.<1>, r4.<3>, r4.<0>
                                     return r5
[2019-11-06 17:22:22] (25s) Registering MethodCallOrder::multiple_invocation_paths_helper#ffff + [] with content 20edaaecf25nldgp24d9c4et8m3kv
[2019-11-06 17:22:22] (25s)  >>> Wrote relation MethodCallOrder::multiple_invocation_paths_helper#ffff with 673194 rows and 4 columns.
[2019-11-06 17:22:22] (25s) Starting to evaluate predicate MethodCallOrder::multiple_invocation_paths_helper#ffff_2301#join_rhs/4@9e5441
[2019-11-06 17:22:22] (25s) Tuple counts for MethodCallOrder::multiple_invocation_paths_helper#ffff_2301#join_rhs:
                      673194 ~0%     {4} r1 = SCAN MethodCallOrder::multiple_invocation_paths_helper#ffff AS I OUTPUT I.<2>, I.<3>, I.<0>, I.<1>
                                     return r1
[2019-11-06 17:22:22] (25s) Registering MethodCallOrder::multiple_invocation_paths_helper#ffff_2301#join_rhs + [] with content 2069301e655fi9mcovngg9hetfqas
[2019-11-06 17:22:22] (25s)  >>> Wrote relation MethodCallOrder::multiple_invocation_paths_helper#ffff_2301#join_rhs with 673194 rows and 4 columns.
[2019-11-06 17:22:22] (25s) Starting to evaluate predicate MethodCallOrder::multiple_invocation_paths#ffff/4@2f7c34
[2019-11-06 17:22:22] (25s) Tuple counts for MethodCallOrder::multiple_invocation_paths#ffff:
                      134547 ~0%     {2} r1 = SCAN CallGraph::TInvocation#fff AS I OUTPUT I.<2>, I.<0>
                      129778 ~0%     {4} r2 = JOIN r1 WITH MethodCallOrder::multiple_invocation_paths_helper#ffff_2301#join_rhs AS R ON FIRST 2 OUTPUT R.<2>, R.<3>, r1.<0>, r1.<1>
                                     return r2
[2019-11-06 17:22:22] (25s) Registering MethodCallOrder::multiple_invocation_paths#ffff + [] with content 1705dcbc08kd9aa40rp2g2e9civhv
[2019-11-06 17:22:22] (25s)  >>> Wrote relation MethodCallOrder::multiple_invocation_paths#ffff with 129778 rows and 4 columns.
[2019-11-06 17:22:22] (25s) Starting to evaluate predicate MethodCallOrder::multiple_invocation_paths#ffff_0312#join_rhs/4@9f9146
[2019-11-06 17:22:22] (25s) Tuple counts for MethodCallOrder::multiple_invocation_paths#ffff_0312#join_rhs:
                      129778 ~0%     {4} r1 = SCAN MethodCallOrder::multiple_invocation_paths#ffff AS I OUTPUT I.<0>, I.<3>, I.<1>, I.<2>
                                     return r1
[2019-11-06 17:22:22] (25s) Registering MethodCallOrder::multiple_invocation_paths#ffff_0312#join_rhs + [] with content 17c3fe1fcbf6ghhdr7hiukqp41rst
[2019-11-06 17:22:22] (25s)  >>> Wrote relation MethodCallOrder::multiple_invocation_paths#ffff_0312#join_rhs with 129778 rows and 4 columns.
```

Execution time on `salt` went from 29.5s to somewhere below 299ms (the predicate
was not listed in the timing report).
2019-11-06 17:27:03 +01:00

73 lines
2.9 KiB
Plaintext

import python
// Helper predicates for multiple call to __init__/__del__ queries.
pragma [noinline]
private predicate multiple_invocation_paths_helper(FunctionInvocation top, FunctionInvocation i1, FunctionInvocation i2, FunctionObject multi) {
i1 != i2 and
i1 = top.getACallee+() and
i2 = top.getACallee+() and
i1.getFunction() = multi
}
pragma [noinline]
private predicate multiple_invocation_paths(FunctionInvocation top, FunctionInvocation i1, FunctionInvocation i2, FunctionObject multi) {
multiple_invocation_paths_helper(top, i1, i2, multi) and
i2.getFunction() = multi
}
/** Holds if `self.name` calls `multi` by multiple paths, and thus calls it more than once. */
predicate multiple_calls_to_superclass_method(ClassObject self, FunctionObject multi, string name) {
exists(FunctionInvocation top, FunctionInvocation i1, FunctionInvocation i2 |
multiple_invocation_paths(top, i1, i2, multi) and
top.runtime(self.declaredAttribute(name)) and
self.getASuperType().declaredAttribute(name) = multi |
/* Only called twice if called from different functions,
* or if one call-site can reach the other */
i1.getCall().getScope() != i2.getCall().getScope()
or
i1.getCall().strictlyReaches(i2.getCall())
)
}
/** Holds if all attributes called `name` can be inferred to be methods. */
private predicate named_attributes_not_method(ClassObject cls, string name) {
cls.declaresAttribute(name) and not cls.declaredAttribute(name) instanceof FunctionObject
}
/** Holds if `f` actually does something. */
private predicate does_something(FunctionObject f) {
f.isBuiltin() and not f = theObjectType().lookupAttribute("__init__")
or
exists(Stmt s | s = f.getFunction().getAStmt() and not s instanceof Pass)
}
/** Holds if `meth` looks like it should have a call to `name`, but does not */
private predicate missing_call(FunctionObject meth, string name) {
exists(CallNode call, AttrNode attr |
call.getScope() = meth.getFunction() and
call.getFunction() = attr and
attr.getName() = name and
not exists(FunctionObject f | f.getACall() = call)
)
}
/** Holds if `self.name` does not call `missing`, even though it is expected to. */
predicate missing_call_to_superclass_method(ClassObject self, FunctionObject top, FunctionObject missing, string name) {
missing = self.getASuperType().declaredAttribute(name) and
top = self.lookupAttribute(name) and
/* There is no call to missing originating from top */
not top.getACallee*() = missing and
/* Make sure that all named 'methods' are objects that we can understand. */
not exists(ClassObject sup |
sup = self.getAnImproperSuperType() and
named_attributes_not_method(sup, name)
) and
not self.isAbstract()
and
does_something(missing)
and
not missing_call(top, name)
}