Python: Fix bad performance

A few changes, all bundled together:

- We were getting a lot of magic applied to the predicates in the
  `ImportStar` module, and this was causing needless re-evaluation.
  To address this, the easiest solution was to simply cache the entire
  module.
- In order to separate this from the dataflow analysis and make it
  dependent only on control flow, `potentialImportStarBase` was changed
  to return a `ControlFlowNode`.
- `isDefinedLocally` was defined on control flow nodes, which meant we
  were duplicating a lot of tuples due to control flow splitting, to no
  actual benefit.

Finally, there was a really bad join in `isDefinedLocally` that was
fixed by separating out a helper predicate. This is a case where we
could use a three-way join, since the join between the `Scope`, the
`name` string and the `Name` is big no matter what.

If we join `scope_defines_name` with `n.getId()`, we'll get `Name`s
belonging to irrelevant scopes.

If we join `scope_defines_name` with the enclosing scope of the `Name`
`n`, then we'll get this also for `Name`s that don't share their `getId`
with the local variable defined in the scope.

If we join `n.getId()` with `n.getScope()...` then we'll get all
enclosing scopes for each `Name`.

The last of these is what we currently have. It's not terrible, but not
great either. (Though thankfully it's rare to have lots of enclosing
scopes.)
This commit is contained in:
Taus
2021-12-08 22:38:32 +00:00
committed by GitHub
parent 7cd9369d91
commit 8517eff0f7
2 changed files with 23 additions and 10 deletions

View File

@@ -375,7 +375,10 @@ module API {
* `moduleImport("foo").getMember("bar")`
*/
private TApiNode potential_import_star_base(Scope s) {
use(result, ImportStar::potentialImportStarBase(s))
exists(DataFlow::Node n |
n.asCfgNode() = ImportStar::potentialImportStarBase(s) and
use(result, n)
)
}
/**

View File

@@ -1,29 +1,29 @@
/** Provides predicates for reasoning about uses of `import *` in Python. */
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.Builtins
cached
module ImportStar {
/**
* Holds if `n` is an access of a variable called `name` (which is _not_ the name of a
* built-in, and which is _not_ a global defined in the enclosing module) inside the scope `s`.
*/
cached
predicate namePossiblyDefinedInImportStar(NameNode n, string name, Scope s) {
n.isLoad() and
name = n.getId() and
s = n.getScope().getEnclosingScope*() and
exists(potentialImportStarBase(s)) and
// Not already defined in an enclosing scope.
not isDefinedLocally(n)
not isDefinedLocally(n.getNode())
}
/** Holds if `n` refers to a variable that is defined in the module in which it occurs. */
private predicate isDefinedLocally(NameNode n) {
cached
private predicate isDefinedLocally(Name n) {
// Defined in an enclosing scope
exists(LocalVariable v |
v.getId() = n.getId() and v.getScope() = n.getScope().getEnclosingScope*()
)
scope_defines_name(n.getScope().getEnclosingScope*(), n.getId())
or
// Defined as a built-in
n.getId() = Builtins::getBuiltinName()
@@ -35,7 +35,14 @@ module ImportStar {
n.getId() in ["__name__", "__package__"]
}
/** Holds if the name `name` is defined in the scope `s` */
pragma[nomagic]
private predicate scope_defines_name(Scope s, string name) {
exists(LocalVariable v | v.getId() = name and v.getScope() = s)
}
/** Holds if a global variable called `name` is assigned a value in the module `m`. */
cached
predicate globalNameDefinedInModule(string name, Module m) {
exists(NameNode n |
not exists(LocalVariable v | n.defines(v)) and
@@ -49,15 +56,17 @@ module ImportStar {
* Holds if `n` may refer to a global variable of the same name in the module `m`, accessible
* from the scope of `n` by a chain of `import *` imports.
*/
cached
predicate importStarResolvesTo(NameNode n, Module m) {
m = getStarImported+(n.getEnclosingModule()) and
globalNameDefinedInModule(n.getId(), m) and
not isDefinedLocally(n)
not isDefinedLocally(n.getNode())
}
/**
* Gets a module that is imported from `m` via `import *`.
*/
cached
Module getStarImported(Module m) {
exists(ImportStar i |
i.getScope() = m and result = i.getModule().pointsTo().(ModuleValue).getScope()
@@ -76,7 +85,8 @@ module ImportStar {
*
* this would return the data-flow nodes corresponding to `foo.bar` and `quux`.
*/
DataFlow::Node potentialImportStarBase(Scope s) {
result.asCfgNode() = any(ImportStarNode n | n.getScope() = s).getModule()
cached
ControlFlowNode potentialImportStarBase(Scope s) {
result = any(ImportStarNode n | n.getScope() = s).getModule()
}
}