mirror of
https://github.com/github/codeql.git
synced 2026-04-06 23:54:11 +02:00
247 lines
9.8 KiB
Plaintext
247 lines
9.8 KiB
Plaintext
/**
|
|
* INTERNAL: Do not use.
|
|
*
|
|
* Has predicates to help find subclasses in library code. Should only be used to aid in
|
|
* the manual library modeling process,
|
|
*/
|
|
|
|
private import python
|
|
private import semmle.python.dataflow.new.DataFlow
|
|
private import semmle.python.dataflow.new.internal.ImportResolution
|
|
private import semmle.python.ApiGraphs
|
|
private import semmle.python.filters.Tests
|
|
private import semmle.python.Module
|
|
|
|
// very much inspired by the draft at https://github.com/github/codeql/pull/5632
|
|
module NotExposed {
|
|
// Instructions:
|
|
// This needs to be automated better, but for this prototype, here are some rough instructions:
|
|
// 0) get a database of the library you are about to model
|
|
// 1) fill out the `getAlreadyModeledClass` body below
|
|
// 2) quick-eval the `quickEvalMe` predicate below, and copy the output to your modeling predicate
|
|
predicate quickEvalMe(string newImport) {
|
|
newImport =
|
|
"// imports generated by python/frameworks/internal/SubclassFinder.qll\n" + "this = API::" +
|
|
concat(string newModelFullyQualified |
|
|
newModel(any(FindSubclassesSpec spec), newModelFullyQualified, _, _, _)
|
|
|
|
|
fullyQualifiedToApiGraphPath(newModelFullyQualified), " or this = API::"
|
|
)
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Implementation below
|
|
// ---------------------------------------------------------------------------
|
|
//
|
|
// We are looking to find all subclassed of the already modeled classes, and ideally
|
|
// we would identify an `API::Node` for each (then `toString` would give the API
|
|
// path).
|
|
//
|
|
// An inherent problem with API graphs is that there doesn't need to exist a result
|
|
// for the API graph path that we want to add to our modeling (the path to the new
|
|
// subclass). As an example, the following query has no results when evaluated against
|
|
// a django/django DB.
|
|
//
|
|
// select API::moduleImport("django") .getMember("contrib") .getMember("admin")
|
|
// .getMember("views") .getMember("main") .getMember("ChangeListSearchForm")
|
|
//
|
|
//
|
|
// Since it is a Form subclass that we would want to capture for our Django modeling,
|
|
// we want to extend our modeling (that is written in a qll file) with exactly that
|
|
// piece of code, but since the API::Node doesn't exist, we can't select that from a
|
|
// predicate and print its path. We need a different approach, and for that we use
|
|
// fully qualified names to capture new classes/new aliases, and transform these into
|
|
// API paths (to be included in the modeling that is inserted into the `.qll` files),
|
|
// see `fullyQualifiedToAPIGraphPath`.
|
|
//
|
|
// NOTE: this implementation was originally created to help with automatically
|
|
// modeling packages in mind, and has been adjusted to help with manual library
|
|
// modeling. See https://github.com/github/codeql/pull/5632 for more discussion.
|
|
//
|
|
//
|
|
bindingset[fullyQualified]
|
|
string fullyQualifiedToApiGraphPath(string fullyQualified) {
|
|
result = "moduleImport(\"" + fullyQualified.replaceAll(".", "\").getMember(\"") + "\")"
|
|
}
|
|
|
|
bindingset[this]
|
|
abstract class FindSubclassesSpec extends string {
|
|
/**
|
|
* Gets an API node for a class that has already been modeled. You can include
|
|
* `.getASubclass*()` without causing problems, but it is not needed.
|
|
*/
|
|
abstract API::Node getAlreadyModeledClass();
|
|
|
|
/**
|
|
* Gets the fully qualified name that this spec represents.
|
|
*
|
|
* This should be implemented by all classes for which `getSuperClass` is
|
|
* implemented, at least if they are defined in a different module than what they
|
|
* subclass.
|
|
*/
|
|
string getFullyQualifiedName() { none() }
|
|
|
|
FindSubclassesSpec getSuperClass() { none() }
|
|
|
|
final FindSubclassesSpec getSubClass() { result.getSuperClass() = this }
|
|
}
|
|
|
|
/**
|
|
* Holds if `newModelFullyQualified` describes either a new subclass, or a new alias, belonging to `spec` that we should include in our automated modeling.
|
|
* This new element is defined by `ast`, which is defined at `loc` in the module `mod`.
|
|
*/
|
|
predicate newModel(
|
|
FindSubclassesSpec spec, string newModelFullyQualified, AstNode ast, Module mod, Location loc
|
|
) {
|
|
(
|
|
newSubclass(spec, newModelFullyQualified, ast, mod, loc)
|
|
or
|
|
newDirectAlias(spec, newModelFullyQualified, ast, mod, loc)
|
|
or
|
|
newImportAlias(spec, newModelFullyQualified, mod, _, _, loc) and
|
|
ast = mod
|
|
)
|
|
}
|
|
|
|
API::Node newOrExistingModeling(FindSubclassesSpec spec) {
|
|
result = spec.getAlreadyModeledClass()
|
|
or
|
|
exists(string newSubclassName |
|
|
newModel(spec, newSubclassName, _, _, _) and
|
|
result.getPath() = fullyQualifiedToApiGraphPath(newSubclassName)
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if `fullyQualifiedName` is already explicitly modeled in the `spec`.
|
|
*
|
|
* For specs that do `.getASubclass*()`, items found by following a `.getASubclass`
|
|
* edge will not be considered explicitly modeled.
|
|
*/
|
|
bindingset[fullyQualifiedName]
|
|
predicate alreadyExplicitlyModeled(FindSubclassesSpec spec, string fullyQualifiedName) {
|
|
fullyQualifiedToApiGraphPath(fullyQualifiedName) = spec.getAlreadyModeledClass().getPath()
|
|
}
|
|
|
|
predicate isAllowedModule(Module mod) {
|
|
// for tests
|
|
mod.getName() = "find_subclass_test"
|
|
or
|
|
// don't include anything found in site-packages
|
|
exists(mod.getFile().getRelativePath()) and
|
|
not mod.getFile().getRelativePath().regexpMatch("(?i)((^|/)examples?|^docs)/.*") and
|
|
// to counter things like `my-example/app/foo.py` being allowed under `app.foo`
|
|
forall(string part | part = mod.getFile().getParent().getRelativePath().splitAt("/") |
|
|
legalShortName(part)
|
|
)
|
|
}
|
|
|
|
predicate isTestCode(AstNode ast) {
|
|
ast.getScope*() instanceof TestScope
|
|
or
|
|
ast.getLocation().getFile().getRelativePath().matches("tests/%")
|
|
}
|
|
|
|
predicate hasAllStatement(Module mod) {
|
|
exists(AssignStmt a, GlobalVariable all |
|
|
a.defines(all) and
|
|
a.getScope() = mod and
|
|
all.getId() = "__all__"
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if `newAliasFullyQualified` describes new alias originating from the import
|
|
* `from <module> import <member> [as <new-name>]`, where `<module>.<member>` belongs to
|
|
* `spec`.
|
|
* So if this import happened in module `foo.bar`, `newAliasFullyQualified` would be
|
|
* `foo.bar.<member>` (or `foo.bar.<new-name>`).
|
|
*
|
|
* Note that this predicate currently respects `__all__` in sort of a backwards fashion.
|
|
* - if `__all__` is defined in module `foo.bar`, we only allow new aliases where the member name is also in `__all__`. (this doesn't map 100% to the semantics of imports though)
|
|
* - If `__all__` is not defined we don't impose any limitations.
|
|
*
|
|
* Also note that we don't currently consider deleting module-attributes at all, so in the code snippet below, we would consider that `my_module.foo` is a
|
|
* reference to `django.foo`, although `my_module.foo` isn't even available at runtime. (there currently also isn't any code to discover that `my_module.bar`
|
|
* is an alias to `django.foo`)
|
|
* ```py
|
|
* # module my_module
|
|
* from django import foo
|
|
* bar = foo
|
|
* del foo
|
|
* ```
|
|
*/
|
|
predicate newDirectAlias(
|
|
FindSubclassesSpec spec, string newAliasFullyQualified, Expr value, Module mod, Location loc
|
|
) {
|
|
exists(Alias alias | value = alias.getValue() |
|
|
value = newOrExistingModeling(spec).getASubclass*().getAValueReachableFromSource().asExpr() and
|
|
value.getScope() = mod and
|
|
loc = value.getLocation() and
|
|
exists(string base |
|
|
mod.isPackageInit() and base = mod.getPackageName()
|
|
or
|
|
not mod.isPackageInit() and base = mod.getName()
|
|
|
|
|
newAliasFullyQualified = base + "." + alias.getAsname().(Name).getId()
|
|
) and
|
|
(
|
|
not hasAllStatement(mod)
|
|
or
|
|
mod.declaredInAll(alias.getAsname().(Name).getId())
|
|
) and
|
|
not alreadyExplicitlyModeled(spec, newAliasFullyQualified) and
|
|
not isTestCode(value) and
|
|
isAllowedModule(mod)
|
|
)
|
|
}
|
|
|
|
/**
|
|
* same as `newDirectAlias` predicate, but written in a generic way to handle any import (also import *).
|
|
*
|
|
* it might be safe to delete `newDirectAlias` with this in place, but have not done the testing yet.
|
|
*/
|
|
predicate newImportAlias(
|
|
FindSubclassesSpec spec, string newAliasFullyQualified, Module mod, DataFlow::Node def,
|
|
string relevantName, Location loc
|
|
) {
|
|
loc = mod.getLocation() and
|
|
exists(API::Node relevantClass, ControlFlowNode value |
|
|
relevantClass = newOrExistingModeling(spec).getASubclass*() and
|
|
ImportResolution::module_export(mod, relevantName, def) and
|
|
value = relevantClass.getAValueReachableFromSource().asCfgNode() and
|
|
value = def.asCfgNode()
|
|
// value could be a ClassExpr if a new class is defined, or a Name if defining an alias
|
|
) and
|
|
(
|
|
mod.isPackageInit() and
|
|
newAliasFullyQualified = mod.getPackageName() + "." + relevantName
|
|
or
|
|
not mod.isPackageInit() and
|
|
newAliasFullyQualified = mod.getName() + "." + relevantName
|
|
) and
|
|
(
|
|
not hasAllStatement(mod)
|
|
or
|
|
mod.declaredInAll(relevantName)
|
|
) and
|
|
not alreadyExplicitlyModeled(spec, newAliasFullyQualified) and
|
|
not isTestCode(mod) and
|
|
isAllowedModule(mod)
|
|
}
|
|
|
|
/** Holds if `classExpr` defines a new subclass that belongs to `spec`, which has the fully qualified name `newSubclassQualified`. */
|
|
predicate newSubclass(
|
|
FindSubclassesSpec spec, string newSubclassQualified, ClassExpr classExpr, Module mod,
|
|
Location loc
|
|
) {
|
|
classExpr = newOrExistingModeling(spec).getASubclass*().asSource().asExpr() and
|
|
classExpr.getScope() = mod and
|
|
newSubclassQualified = mod.getName() + "." + classExpr.getName() and
|
|
loc = classExpr.getLocation() and
|
|
not alreadyExplicitlyModeled(spec, newSubclassQualified) and
|
|
not isTestCode(classExpr) and
|
|
isAllowedModule(mod)
|
|
}
|
|
}
|