mirror of
https://github.com/github/codeql.git
synced 2026-06-15 18:01:10 +02:00
The `multiple_invocation_paths` predicate had a bad join order where
we (essentially) joined `i1` with `i2` and only then joined `i1` and `i2`
separately to reduce the number of tuples. The join coming from `i1 != i2` had
little impact, but `i1.getFunction() = multi` made a big difference (and
similarly for `i2`). I factored out the code so that these joins would be done
more eagerly. Thus, we went from
```
[2019-11-06 16:53:05] (38s) Starting to evaluate predicate MethodCallOrder::multiple_invocation_paths#ffff/4@2ce75a
[2019-11-06 16:53:35] (68s) Tuple counts for MethodCallOrder::multiple_invocation_paths#ffff:
134547 ~9% {2} r1 = SCAN CallGraph::TInvocation#fff AS I OUTPUT I.<0>, I.<2>
235284431 ~3% {4} r2 = JOIN r1 WITH CallGraph::TInvocation#fff AS R ON FIRST 1 OUTPUT r1.<0>, r1.<1>, R.<1>, R.<2>
235149884 ~3% {4} r3 = SELECT r2 ON r2.<3> != r2.<1>
235149884 ~4% {3} r4 = SCAN r3 OUTPUT r3.<1>, r3.<0>, r3.<3>
166753634 ~5% {4} r5 = JOIN r4 WITH #CallGraph::FunctionInvocation::getACallee_dispred#ffPlus#swapped AS R ON FIRST 1 OUTPUT R.<1>, r4.<2>, r4.<1>, r4.<0>
129778 ~0% {4} r6 = JOIN r5 WITH #CallGraph::FunctionInvocation::getACallee_dispred#ffPlus AS R ON FIRST 2 OUTPUT r5.<0>, r5.<3>, r5.<1>, r5.<2>
return r6
[2019-11-06 16:53:35] (68s) Registering MethodCallOrder::multiple_invocation_paths#ffff + [] with content 1705dcbc08kd9aa40rp2g2e9civhv
[2019-11-06 16:53:35] (68s) >>> Wrote relation MethodCallOrder::multiple_invocation_paths#ffff with 129778 rows and 4 columns.
```
to
```
[2019-11-06 17:22:22] (25s) Starting to evaluate predicate MethodCallOrder::multiple_invocation_paths_helper#ffff/4@586aec
[2019-11-06 17:22:22] (25s) Tuple counts for MethodCallOrder::multiple_invocation_paths_helper#ffff:
134547 ~0% {2} r1 = SCAN CallGraph::TInvocation#fff AS I OUTPUT I.<2>, I.<0>
88111 ~4% {3} r2 = JOIN r1 WITH #CallGraph::FunctionInvocation::getACallee_dispred#ffPlus#swapped AS R ON FIRST 1 OUTPUT R.<1>, r1.<1>, r1.<0>
761305 ~0% {4} r3 = JOIN r2 WITH #CallGraph::FunctionInvocation::getACallee_dispred#ffPlus AS R ON FIRST 1 OUTPUT r2.<1>, r2.<2>, r2.<0>, R.<1>
673194 ~0% {4} r4 = SELECT r3 ON r3.<3> != r3.<1>
673194 ~0% {4} r5 = SCAN r4 OUTPUT r4.<2>, r4.<1>, r4.<3>, r4.<0>
return r5
[2019-11-06 17:22:22] (25s) Registering MethodCallOrder::multiple_invocation_paths_helper#ffff + [] with content 20edaaecf25nldgp24d9c4et8m3kv
[2019-11-06 17:22:22] (25s) >>> Wrote relation MethodCallOrder::multiple_invocation_paths_helper#ffff with 673194 rows and 4 columns.
[2019-11-06 17:22:22] (25s) Starting to evaluate predicate MethodCallOrder::multiple_invocation_paths_helper#ffff_2301#join_rhs/4@9e5441
[2019-11-06 17:22:22] (25s) Tuple counts for MethodCallOrder::multiple_invocation_paths_helper#ffff_2301#join_rhs:
673194 ~0% {4} r1 = SCAN MethodCallOrder::multiple_invocation_paths_helper#ffff AS I OUTPUT I.<2>, I.<3>, I.<0>, I.<1>
return r1
[2019-11-06 17:22:22] (25s) Registering MethodCallOrder::multiple_invocation_paths_helper#ffff_2301#join_rhs + [] with content 2069301e655fi9mcovngg9hetfqas
[2019-11-06 17:22:22] (25s) >>> Wrote relation MethodCallOrder::multiple_invocation_paths_helper#ffff_2301#join_rhs with 673194 rows and 4 columns.
[2019-11-06 17:22:22] (25s) Starting to evaluate predicate MethodCallOrder::multiple_invocation_paths#ffff/4@2f7c34
[2019-11-06 17:22:22] (25s) Tuple counts for MethodCallOrder::multiple_invocation_paths#ffff:
134547 ~0% {2} r1 = SCAN CallGraph::TInvocation#fff AS I OUTPUT I.<2>, I.<0>
129778 ~0% {4} r2 = JOIN r1 WITH MethodCallOrder::multiple_invocation_paths_helper#ffff_2301#join_rhs AS R ON FIRST 2 OUTPUT R.<2>, R.<3>, r1.<0>, r1.<1>
return r2
[2019-11-06 17:22:22] (25s) Registering MethodCallOrder::multiple_invocation_paths#ffff + [] with content 1705dcbc08kd9aa40rp2g2e9civhv
[2019-11-06 17:22:22] (25s) >>> Wrote relation MethodCallOrder::multiple_invocation_paths#ffff with 129778 rows and 4 columns.
[2019-11-06 17:22:22] (25s) Starting to evaluate predicate MethodCallOrder::multiple_invocation_paths#ffff_0312#join_rhs/4@9f9146
[2019-11-06 17:22:22] (25s) Tuple counts for MethodCallOrder::multiple_invocation_paths#ffff_0312#join_rhs:
129778 ~0% {4} r1 = SCAN MethodCallOrder::multiple_invocation_paths#ffff AS I OUTPUT I.<0>, I.<3>, I.<1>, I.<2>
return r1
[2019-11-06 17:22:22] (25s) Registering MethodCallOrder::multiple_invocation_paths#ffff_0312#join_rhs + [] with content 17c3fe1fcbf6ghhdr7hiukqp41rst
[2019-11-06 17:22:22] (25s) >>> Wrote relation MethodCallOrder::multiple_invocation_paths#ffff_0312#join_rhs with 129778 rows and 4 columns.
```
Execution time on `salt` went from 29.5s to somewhere below 299ms (the predicate
was not listed in the timing report).
73 lines
2.9 KiB
Plaintext
73 lines
2.9 KiB
Plaintext
import python
|
|
|
|
// Helper predicates for multiple call to __init__/__del__ queries.
|
|
|
|
pragma [noinline]
|
|
private predicate multiple_invocation_paths_helper(FunctionInvocation top, FunctionInvocation i1, FunctionInvocation i2, FunctionObject multi) {
|
|
i1 != i2 and
|
|
i1 = top.getACallee+() and
|
|
i2 = top.getACallee+() and
|
|
i1.getFunction() = multi
|
|
}
|
|
|
|
pragma [noinline]
|
|
private predicate multiple_invocation_paths(FunctionInvocation top, FunctionInvocation i1, FunctionInvocation i2, FunctionObject multi) {
|
|
multiple_invocation_paths_helper(top, i1, i2, multi) and
|
|
i2.getFunction() = multi
|
|
}
|
|
|
|
/** Holds if `self.name` calls `multi` by multiple paths, and thus calls it more than once. */
|
|
predicate multiple_calls_to_superclass_method(ClassObject self, FunctionObject multi, string name) {
|
|
exists(FunctionInvocation top, FunctionInvocation i1, FunctionInvocation i2 |
|
|
multiple_invocation_paths(top, i1, i2, multi) and
|
|
top.runtime(self.declaredAttribute(name)) and
|
|
self.getASuperType().declaredAttribute(name) = multi |
|
|
/* Only called twice if called from different functions,
|
|
* or if one call-site can reach the other */
|
|
i1.getCall().getScope() != i2.getCall().getScope()
|
|
or
|
|
i1.getCall().strictlyReaches(i2.getCall())
|
|
)
|
|
}
|
|
|
|
/** Holds if all attributes called `name` can be inferred to be methods. */
|
|
private predicate named_attributes_not_method(ClassObject cls, string name) {
|
|
cls.declaresAttribute(name) and not cls.declaredAttribute(name) instanceof FunctionObject
|
|
}
|
|
|
|
/** Holds if `f` actually does something. */
|
|
private predicate does_something(FunctionObject f) {
|
|
f.isBuiltin() and not f = theObjectType().lookupAttribute("__init__")
|
|
or
|
|
exists(Stmt s | s = f.getFunction().getAStmt() and not s instanceof Pass)
|
|
}
|
|
|
|
/** Holds if `meth` looks like it should have a call to `name`, but does not */
|
|
private predicate missing_call(FunctionObject meth, string name) {
|
|
exists(CallNode call, AttrNode attr |
|
|
call.getScope() = meth.getFunction() and
|
|
call.getFunction() = attr and
|
|
attr.getName() = name and
|
|
not exists(FunctionObject f | f.getACall() = call)
|
|
)
|
|
}
|
|
|
|
/** Holds if `self.name` does not call `missing`, even though it is expected to. */
|
|
predicate missing_call_to_superclass_method(ClassObject self, FunctionObject top, FunctionObject missing, string name) {
|
|
missing = self.getASuperType().declaredAttribute(name) and
|
|
top = self.lookupAttribute(name) and
|
|
/* There is no call to missing originating from top */
|
|
not top.getACallee*() = missing and
|
|
/* Make sure that all named 'methods' are objects that we can understand. */
|
|
not exists(ClassObject sup |
|
|
sup = self.getAnImproperSuperType() and
|
|
named_attributes_not_method(sup, name)
|
|
) and
|
|
not self.isAbstract()
|
|
and
|
|
does_something(missing)
|
|
and
|
|
not missing_call(top, name)
|
|
}
|
|
|