python: recover isPackageUsed

- add `unknownAttribute` to pre-compute negation
- add `Node`-less formulation of "is imported"
This commit is contained in:
yoff
2022-07-01 09:39:07 +00:00
committed by GitHub
parent 71583bf6be
commit f6af24894d
4 changed files with 61 additions and 25 deletions

View File

@@ -291,6 +291,21 @@ module API {
not m.matches("%.%")
}
/**
* Holds if an import of module `m` exists.
*
* This is determined without referring to `Node`,
* allowing this predicate to be used in a negative
* context when constructing new nodes.
*/
predicate moduleImportExists(string m) {
Impl::isImported(m) and
// restrict `moduleImport` so it will never give results for a dotted name. Note
// that we cannot move this logic to the `MkModuleImport` construction, since we
// need the intermediate API graph nodes for the prefixes in `import foo.bar.baz`.
not m.matches("%.%")
}
/** Gets a node corresponding to the built-in with the given name, if any. */
Node builtin(string n) { result = moduleImport("builtins").getMember(n) }
@@ -492,14 +507,38 @@ module API {
*
* Ignores relative imports, such as `from ..foo.bar import baz`.
*/
private predicate imports(DataFlow::Node imp, string name) {
private predicate imports(DataFlow::CfgNode imp, string name) {
exists(PY::ImportExprNode iexpr |
imp.asCfgNode() = iexpr and
imp.getNode() = iexpr and
not iexpr.getNode().isRelative() and
name = iexpr.getNode().getImportedModuleName()
)
}
/**
* Holds is them module `name` is imported.
*
* This is determined syntactically.
*/
cached
predicate isImported(string name) {
// Ignore the following module name for Python 2, as we alias `__builtin__` to `builtins` elsewhere
(name != "__builtin__" or PY::major_version() = 3) and
(
exists(PY::ImportExpr iexpr |
not iexpr.isRelative() and
name = iexpr.getImportedModuleName()
)
or
// When we `import foo.bar.baz` we want to create API graph nodes also for the prefixes
// `foo` and `foo.bar`:
name = any(PY::ImportExpr e | not e.isRelative()).getAnImportedModuleName()
)
or
// The `builtins` module should always be implicitly available
name = "builtins"
}
private import semmle.python.dataflow.new.internal.Builtins
private import semmle.python.dataflow.new.internal.ImportStar
@@ -518,7 +557,7 @@ module API {
*/
private TApiNode potential_import_star_base(PY::Scope s) {
exists(DataFlow::Node n |
n.asCfgNode() = ImportStar::potentialImportStarBase(s) and
n.(DataFlow::CfgNode).getNode() = ImportStar::potentialImportStarBase(s) and
use(result, n)
)
}
@@ -540,17 +579,17 @@ module API {
or
// TODO: I had expected `DataFlow::AttrWrite` to contain the attribute writes from a dict, that's how JS works.
exists(PY::Dict dict, PY::KeyValuePair item |
dict = pred.asExpr() and
dict = pred.(DataFlow::ExprNode).getNode().getNode() and
dict.getItem(_) = item and
lbl = Label::member(item.getKey().(PY::StrConst).getS()) and
rhs.asExpr() = item.getValue()
rhs.(DataFlow::ExprNode).getNode().getNode() = item.getValue()
)
or
exists(PY::CallableExpr fn | fn = pred.asExpr() |
exists(PY::CallableExpr fn | fn = pred.(DataFlow::ExprNode).getNode().getNode() |
not fn.getInnerScope().isAsync() and
lbl = Label::return() and
exists(PY::Return ret |
rhs.asExpr() = ret.getValue() and
rhs.(DataFlow::ExprNode).getNode().getNode() = ret.getValue() and
ret.getScope() = fn.getInnerScope()
)
)
@@ -592,7 +631,8 @@ module API {
// Subclassing a node
lbl = Label::subclass() and
exists(DataFlow::Node superclass | pred.flowsTo(superclass) |
ref.asExpr().(PY::ClassExpr).getABase() = superclass.asExpr()
ref.(DataFlow::ExprNode).getNode().getNode().(PY::ClassExpr).getABase() =
superclass.(DataFlow::ExprNode).getNode().getNode()
)
or
// awaiting
@@ -604,7 +644,7 @@ module API {
)
or
exists(DataFlow::Node def, PY::CallableExpr fn |
rhs(base, def) and fn = trackDefNode(def).asExpr()
rhs(base, def) and fn = trackDefNode(def).(DataFlow::ExprNode).getNode().getNode()
|
exists(int i, int offset |
if exists(PY::Parameter p | p = fn.getInnerScope().getAnArg() and p.isSelf())
@@ -612,18 +652,19 @@ module API {
else offset = 0
|
lbl = Label::parameter(i - offset) and
ref.asExpr() = fn.getInnerScope().getArg(i)
ref.(DataFlow::ExprNode).getNode().getNode() = fn.getInnerScope().getArg(i)
)
or
exists(string name, PY::Parameter param |
lbl = Label::keywordParameter(name) and
param = fn.getInnerScope().getArgByName(name) and
not param.isSelf() and
ref.asExpr() = param
ref.(DataFlow::ExprNode).getNode().getNode() = param
)
or
lbl = Label::selfParameter() and
ref.asExpr() = any(PY::Parameter p | p = fn.getInnerScope().getAnArg() and p.isSelf())
ref.(DataFlow::ExprNode).getNode().getNode() =
any(PY::Parameter p | p = fn.getInnerScope().getAnArg() and p.isSelf())
)
or
// Built-ins, treated as members of the module `builtins`
@@ -635,7 +676,7 @@ module API {
base = potential_import_star_base(s) and
lbl =
Label::member(any(string name |
ImportStar::namePossiblyDefinedInImportStar(ref.asCfgNode(), name, s)
ImportStar::namePossiblyDefinedInImportStar(ref.(DataFlow::CfgNode).getNode(), name, s)
))
)
}
@@ -901,7 +942,7 @@ module API {
ApiLabel memberFromRef(DataFlow::AttrRef ref) {
result = member(ref.getAttributeName())
or
not exists(ref.getAttributeName()) and
ref.unknownAttribute() and
result = unknownMember()
}

View File

@@ -50,6 +50,9 @@ abstract class AttrRef extends Node {
* better results.
*/
abstract string getAttributeName();
/** Holds if a name could not be determined for this attribute. */
predicate unknownAttribute() { not exists(this.getAttributeName()) }
}
/**

View File

@@ -60,9 +60,9 @@ module Builtins {
* Currently this is an over-approximation, and may not account for things like overwriting a
* built-in with a different value.
*/
DataFlow::Node likelyBuiltin(string name) {
DataFlow::CfgNode likelyBuiltin(string name) {
exists(Module m |
result.asCfgNode() =
result.getNode() =
any(NameNode n |
possible_builtin_accessed_in_module(n, name, m) and
not possible_builtin_defined_in_module(name, m)

View File

@@ -33,15 +33,7 @@ private import AccessPathSyntax
/**
* Holds if models describing `package` may be relevant for the analysis of this database.
*/
bindingset[package]
predicate isPackageUsed(string package) {
// We would like to ask for imports here,
// but that entails non-monotonic recursion.
// For now, we follow Ruby and just allow all packages.
//
// exists(API::moduleImport(package))
any()
}
predicate isPackageUsed(string package) { API::moduleImportExists(package) }
/** Gets a Python-specific interpretation of the `(package, type, path)` tuple after resolving the first `n` access path tokens. */
bindingset[package, type, path]