Python: Support Model Editor

This commit is contained in:
Rasmus Lerchedahl Petersen
2024-06-20 21:32:46 +02:00
parent db768960f4
commit 280a9b4408
9 changed files with 420 additions and 0 deletions

View File

@@ -0,0 +1,248 @@
/** Provides classes and predicates related to handling APIs for the VS Code extension. */
private import python
private import semmle.python.frameworks.data.ModelsAsData
private import semmle.python.frameworks.data.internal.ApiGraphModelsExtensions
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DP
private import Util as Util
/**
* An string describing the kind of source code element being modeled.
*
* See `EndPoint`.
*/
class EndpointKind extends string {
EndpointKind() {
this in ["Function", "InstanceMethod", "ClassMethod", "StaticMethod", "InitMethod", "Class"]
}
}
/**
* An element of the source code to be modeled.
*
* See `EndPointKind` for the possible kinds of elements.
*/
abstract class Endpoint instanceof Scope {
string namespace;
string type;
string name;
Endpoint() {
this.isPublic() and
this.getLocation().getFile() instanceof Util::RelevantFile and
exists(string scopePath, string path, int pathIndex |
scopePath = Util::computeScopePath(this) and
pathIndex = scopePath.indexOf(".", 0, 0)
|
namespace = scopePath.prefix(pathIndex) and
path = scopePath.suffix(pathIndex + 1) and
(
exists(int nameIndex | nameIndex = max(path.indexOf(".")) |
type = path.prefix(nameIndex) and
name = path.suffix(nameIndex + 1)
)
or
not exists(path.indexOf(".")) and
type = "" and
name = path
)
)
}
/** Gets the namespace for this endpoint. This will typically be the package in which it is found. */
string getNamespace() { result = namespace }
/** Gets hte basename of the file where this endpoint is found. */
string getFileName() { result = super.getLocation().getFile().getBaseName() }
/** Gets a string representation of this endpoint. */
string toString() { result = super.toString() }
/** Gets the location of this endpoint. */
Location getLocation() { result = super.getLocation() }
/** Gets the name of the class in which this endpoint is found, or the empty string if it is not found inside a class. */
string getType() { result = type }
/**
* Gets the name of the endpoint if it is not a class, or the empty string if it is a class
*
* If this endpoint is a class, the class name can be obtained via `getType`.
*/
string getName() { result = name }
/**
* Gets a string representation of the parameters of this endpoint.
*
* The string follows a specific format:
* - Positional parameters are listed in order, separated by commas.
* - Keyword parameters are listed in order, separated by commas, each followed by a colon.
* - In the future, positional-only parameters will be listed in order, separated by commas, each followed by a slash.
*/
abstract string getParameters();
/**
* Gets a boolean that is true iff this endpoint is supported by existing modeling.
*
* The check only takes Models ss Data extension models into account.
*/
abstract boolean getSupportedStatus();
/**
* Gets a string that describes the type of support detected this endpoint.
*
* The string can be one of the following:
* - "source" if this endpoint is a known source.
* - "sink" if this endpoint is a known sink.
* - "summary" if this endpoint has a flow summary.
* - "neutral" if this endpoint is a known neutral.
* - "" if this endpoint is not detected as supported.
*/
abstract string getSupportedType();
/** Gets the kind of this endpoint. See `EndPointKind`. */
abstract EndpointKind getKind();
}
private predicate sourceModelPath(string type, string path) { sourceModel(type, path, _, _) }
module FindSourceModel = Util::FindModel<sourceModelPath/2>;
private predicate sinkModelPath(string type, string path) { sinkModel(type, path, _, _) }
module FindSinkModel = Util::FindModel<sinkModelPath/2>;
private predicate summaryModelPath(string type, string path) {
summaryModel(type, path, _, _, _, _)
}
module FindSummaryModel = Util::FindModel<summaryModelPath/2>;
private predicate neutralModelPath(string type, string path) { neutralModel(type, path, _) }
module FindNeutralModel = Util::FindModel<neutralModelPath/2>;
/**
* A callable function or method from source code.
*/
class FunctionEndpoint extends Endpoint instanceof Function {
/**
* Gets the parameter types of this endpoint.
*/
override string getParameters() {
// For now, return the names of positional and keyword parameters. We don't always have type information, so we can't return type names.
// We don't yet handle splat params or dict splat params.
//
// In Python, there are three types of parameters:
// 1. Positional-only parameters: These are parameters that can only be passed by position and not by keyword.
// 2. Positional-or-keyword parameters: These are parameters that can be passed by position or by keyword.
// 3. Keyword-only parameters: These are parameters that can only be passed by keyword.
//
// The syntax for defining these parameters is as follows:
// ```python
// def f(a, /, b, *, c):
// pass
// ```
// In this example, `a` is a positional-only parameter, `b` is a positional-or-keyword parameter, and `c` is a keyword-only parameter.
//
// We handle positional-only parameters by adding a "/" to the parameter name, reminiscient of the syntax above.
// Note that we don't yet have information about positional-only parameters.
// We handle keyword-only parameters by adding a ":" to the parameter name, to be consistent with the MaD syntax and the other languages.
exists(int nrPosOnly, Function f |
f = this and
nrPosOnly = f.getPositionalParameterCount()
|
result =
"(" +
concat(string key, string value |
// TODO: Once we have information about positional-only parameters:
// Handle positional-only parameters by adding a "/"
value = any(int i | i.toString() = key | f.getArgName(i))
or
exists(Name param | param = f.getAKeywordOnlyArg() |
param.getId() = key and
value = key + ":"
)
|
value, "," order by key
) + ")"
)
}
/** Holds if this API has a supported summary. */
pragma[nomagic]
predicate hasSummary() { FindSummaryModel::hasModel(this) }
/** Holds if this API is a known source. */
pragma[nomagic]
predicate isSource() { FindSourceModel::hasModel(this) }
/** Holds if this API is a known sink. */
pragma[nomagic]
predicate isSink() { FindSinkModel::hasModel(this) }
/** Holds if this API is a known neutral. */
pragma[nomagic]
predicate isNeutral() { FindNeutralModel::hasModel(this) }
/**
* Holds if this API is supported by existing CodeQL libraries, that is, it is either a
* recognized source, sink or neutral or it has a flow summary.
*/
predicate isSupported() {
this.hasSummary() or this.isSource() or this.isSink() or this.isNeutral()
}
override boolean getSupportedStatus() {
if this.isSupported() then result = true else result = false
}
override string getSupportedType() {
this.isSink() and result = "sink"
or
this.isSource() and result = "source"
or
this.hasSummary() and result = "summary"
or
this.isNeutral() and result = "neutral"
or
not this.isSupported() and result = ""
}
override EndpointKind getKind() {
if this.(Function).isMethod()
then
result = this.methodKind()
or
not exists(this.methodKind()) and result = "InstanceMethod"
else result = "Function"
}
private EndpointKind methodKind() {
this.(Function).isMethod() and
(
DP::isClassmethod(this) and result = "ClassMethod"
or
DP::isStaticmethod(this) and result = "StaticMethod"
or
this.(Function).isInitMethod() and result = "InitMethod"
)
}
}
/**
* A class from source code.
*/
class ClassEndpoint extends Endpoint instanceof Class {
override string getType() { result = type + "." + name }
override string getName() { result = "" }
override string getParameters() { result = "" }
override boolean getSupportedStatus() { result = false }
override string getSupportedType() { result = "" }
override EndpointKind getKind() { result = "Class" }
}

View File

@@ -0,0 +1,93 @@
/**
* Contains utility methods and classes to assist with generating data extensions models.
*/
private import python
private import semmle.python.ApiGraphs
/**
* A file that probably contains tests.
*/
class TestFile extends File {
TestFile() {
this.getRelativePath().regexpMatch(".*(test|spec|examples).+") and
not this.getAbsolutePath().matches("%/ql/test/%") // allows our test cases to work
}
}
/**
* A file that is relevant in the context of library modeling.
*
* In practice, this means a file that is not part of test code.
*/
class RelevantFile extends File {
RelevantFile() { not this instanceof TestFile and not this.inStdlib() }
}
/**
* Gets the dotted path of a scope.
*/
string computeScopePath(Scope scope) {
// base case
if scope instanceof Module
then
scope.(Module).isPackageInit() and
result = scope.(Module).getPackageName()
or
not scope.(Module).isPackageInit() and
result = scope.(Module).getName()
else
//recursive cases
if scope instanceof Class
then
result = computeScopePath(scope.(Class).getEnclosingScope()) + "." + scope.(Class).getName()
else
if scope instanceof Function
then
result =
computeScopePath(scope.(Function).getEnclosingScope()) + "." + scope.(Function).getName()
else result = "unknown: " + scope.toString()
}
signature predicate modelSig(string type, string path);
/**
* A utility module for finding models of endpoints.
*
* Chiefly the `hasModel` predicate is used to determine if a scope has a model.
*/
module FindModel<modelSig/2 model> {
/**
* Holds if the given scope has a model as identified by the provided predicate `model`.
*/
predicate hasModel(Scope scope) {
exists(string type, string path, string searchPath | model(type, path) |
searchPath = possibleMemberPathPrefix(path, scope.getName()) and
pathToScope(scope, type, searchPath)
)
}
/**
* returns the prefix of `path` that might be a path to `member`
*/
bindingset[path, member]
string possibleMemberPathPrefix(string path, string member) {
// functionName must be a substring of path
exists(int index | index = path.indexOf(["Member", "Method"] + "[" + member + "]") |
result = path.prefix(index)
)
}
/**
* Holds if `(type,path)` evaluates to the given entity, when evalauted from a client of the current library.
*/
bindingset[type, path]
predicate pathToScope(Scope scope, string type, string path) {
scope.getLocation().getFile() instanceof RelevantFile and
scope.isPublic() and // only public methods are modeled
computeScopePath(scope) =
type.replaceAll("!", "") + "." +
path.replaceAll("Member[", "").replaceAll("]", "").replaceAll("Instance.", "") +
scope.getName()
}
}

View File

@@ -0,0 +1,14 @@
/**
* @name Fetch endpoints for use in the model editor (framework mode)
* @description A list of endpoints accessible (methods and attributes) for consumers of the library. Excludes test and generated code.
* @kind table
* @id py/utils/modeleditor/framework-mode-endpoints
* @tags modeleditor endpoints framework-mode
*/
import modeling.ModelEditor
from Endpoint endpoint
select endpoint, endpoint.getNamespace(), endpoint.getType(), endpoint.getName(),
endpoint.getParameters(), endpoint.getSupportedStatus(), endpoint.getFileName(),
endpoint.getSupportedType(), endpoint.getKind()

View File

@@ -0,0 +1,11 @@
| MyPackage/Foo.py:1:1:1:9 | Class C1 | MyPackage | Foo.C1 | | | false | Foo.py | | Class |
| MyPackage/Foo.py:2:5:2:17 | Function m1 | MyPackage | Foo.C1 | m1 | (self) | true | Foo.py | source | InstanceMethod |
| MyPackage/Foo.py:5:5:5:20 | Function m2 | MyPackage | Foo.C1 | m2 | (self,x) | true | Foo.py | source | InstanceMethod |
| MyPackage/Foo.py:9:5:9:14 | Function m3 | MyPackage | Foo.C1 | m3 | (x) | true | Foo.py | summary | StaticMethod |
| MyPackage/Foo.py:13:5:13:19 | Function m4 | MyPackage | Foo.C1 | m4 | (cls,x) | true | Foo.py | summary | ClassMethod |
| MyPackage/Foo.py:16:1:16:13 | Class C2 | MyPackage | Foo.C2 | | | false | Foo.py | | Class |
| MyPackage/Foo.py:17:5:17:17 | Function m1 | MyPackage | Foo.C2 | m1 | (self) | false | Foo.py | | InstanceMethod |
| MyPackage/Foo.py:20:5:20:27 | Function c2only_m1 | MyPackage | Foo.C2 | c2only_m1 | (self,x) | false | Foo.py | | InstanceMethod |
| MyPackage/Foo.py:23:1:23:9 | Class C3 | MyPackage | Foo.C3 | | | false | Foo.py | | Class |
| MyPackage/Foo.py:24:5:24:26 | Function get_C2_instance | MyPackage | Foo.C3 | get_C2_instance | () | false | Foo.py | | InstanceMethod |
| TopLevel.py:3:1:3:38 | Function top_level_funciton | TopLevel | | top_level_funciton | (x,y,z:) | false | TopLevel.py | | Function |

View File

@@ -0,0 +1,20 @@
extensions:
- addsTo:
pack: codeql/python-all
extensible: sourceModel
data:
- ["MyPackage.Foo.C1","Member[m1].ReturnValue","remote"]
- ["MyPackage","Member[Foo].Member[C1].Instance.Member[m2].ReturnValue","remote"]
- addsTo:
pack: codeql/python-all
extensible: summaryModel
data:
- ["MyPackage.Foo.C1!","Member[m3]","Argument[0]","ReturnValue","value"]
- ["MyPackage","Member[Foo].Member[C1].Member[m4]","Argument[0]","ReturnValue","value"]
- addsTo:
pack: codeql/python-all
extensible: typeModel
data:
- ["MyPackage.Foo.C2!","MyPackage","Member[Foo].Member[C3].Member[get_C2_instance].ReturnValue"]

View File

@@ -0,0 +1 @@
utils/modeleditor/FrameworkModeEndpoints.ql

View File

@@ -0,0 +1,25 @@
class C1:
def m1(self):
print("C1.m1()")
def m2(self, x):
return x
@staticmethod
def m3(x):
return x
@classmethod
def m4(cls, x):
return x
class C2(C1):
def m1(self):
print("C2.m1()")
def c2only_m1(self, x):
return x
class C3:
def get_C2_instance():
return C2()

View File

@@ -0,0 +1,8 @@
from MyPackage import Foo
def top_level_funciton(x, /, y, *, z):
return [x, y, z]
top_level_value = Foo.C1()
iC2 = Foo.C3.get_C2_instance()