Compare commits

...

14 Commits

Author SHA1 Message Date
Rasmus Lerchedahl Petersen
4b3157f212 ModelEditor: More robust path resolution
Can handle both old and new style paths
We now transform a (type,path) pair into a dotted path
and compare with a computed dotted path
Tests now pass
2024-05-23 16:27:24 +02:00
Rasmus Lerchedahl Petersen
974b59c9a4 ModelEditor: add modelKind
values are
`"Function", "InstanceMethod", "ClassMethod", "StaticMethod", "InitMethod", "Class"`
2024-05-22 12:27:37 +02:00
Rasmus Lerchedahl Petersen
1504e0d5ec ModelEditor: class endpoint 2024-05-22 12:06:32 +02:00
Rasmus Lerchedahl Petersen
deb9e0db60 ModelEditor: more tests 2024-05-22 11:36:54 +02:00
Rasmus Lerchedahl Petersen
4edb3dc260 modelEditor: fix extensible predicate calls 2024-05-22 11:17:56 +02:00
Rasmus Lerchedahl Petersen
41e8581ec2 modelEditor: move stuff to lib and add tests 2024-05-22 11:05:44 +02:00
Rasmus Lerchedahl Petersen
d46a2ba1d2 Python: add (more) support for MaD sinks
- path-injection, rflected-xss, and html-injection already had support
- I only conisdered the sink kinds provided by the Model Editor
2024-05-22 11:05:44 +02:00
Rasmus Lerchedahl Petersen
9c3559262b python/model-editor: Improve paths
- handle classes
- trim `.ReturnValue` and `.Argument[...]` for source and sink paths
2024-05-22 11:05:44 +02:00
Rasmus Lerchedahl Petersen
7ab36f7bd8 python/model-editor: add comment and TODO
around positional-only parameters
2024-05-22 11:05:44 +02:00
Rasmus Lerchedahl Petersen
51d41821c4 python/modeleditor: fix bad autocomplete 2024-05-22 11:05:44 +02:00
Rasmus Lerchedahl Petersen
f23933f40f python/modeleditor: split path into name and type 2024-05-22 11:05:43 +02:00
Rasmus Lerchedahl Petersen
69a9af533c python/modeleditor: implement hasSummary, is[Source/Sink/Neutral] 2024-05-22 11:05:43 +02:00
Rasmus Lerchedahl Petersen
060c410a87 python/modeleditor: improve endpoint path computation 2024-05-22 11:05:43 +02:00
Koen Vlaswinkel
e4213b5b4c Python: Add simple model editor queries 2024-05-22 11:05:43 +02:00
13 changed files with 375 additions and 0 deletions

View File

@@ -0,0 +1,198 @@
/** Provides classes and predicates related to handling APIs for the VS Code extension. */
private import python
private import semmle.python.frameworks.data.ModelsAsData
private import semmle.python.frameworks.data.internal.ApiGraphModelsExtensions
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DP
private import Util as Util
class EndpointKind extends string {
EndpointKind() {
this in ["Function", "InstanceMethod", "ClassMethod", "StaticMethod", "InitMethod", "Class"]
}
}
abstract class Endpoint instanceof Scope {
string namespace;
string type;
string name;
Endpoint() {
this.isPublic() and
this.getLocation().getFile() instanceof Util::RelevantFile and
exists(string scopePath, string path, int pathIndex |
scopePath = Util::computeScopePath(this) and
pathIndex = scopePath.indexOf(".", 0, 0)
|
namespace = scopePath.prefix(pathIndex) and
path = scopePath.suffix(pathIndex + 1) and
(
exists(int nameIndex | nameIndex = max(path.indexOf(".")) |
type = path.prefix(nameIndex) and
name = path.suffix(nameIndex + 1)
)
or
not exists(path.indexOf(".")) and
type = "" and
name = path
)
)
}
string getNamespace() { result = namespace }
string getFileName() { result = super.getLocation().getFile().getBaseName() }
string toString() { result = super.toString() }
Location getLocation() { result = super.getLocation() }
string getType() { result = type }
string getName() { result = name }
abstract string getParameters();
abstract boolean getSupportedStatus();
abstract string getSupportedType();
abstract EndpointKind getKind();
}
predicate sourceModelPath(string type, string path) { sourceModel(type, path, _, _) }
module FindSourceModel = Util::FindModel<sourceModelPath/2>;
predicate sinkModelPath(string type, string path) { sinkModel(type, path, _, _) }
module FindSinkModel = Util::FindModel<sinkModelPath/2>;
predicate summaryModelPath(string type, string path) { summaryModel(type, path, _, _, _, _) }
module FindSummaryModel = Util::FindModel<summaryModelPath/2>;
predicate neutralModelPath(string type, string path) { neutralModel(type, path, _) }
module FindNeutralModel = Util::FindModel<neutralModelPath/2>;
/**
* A callable function or method from source code.
*/
class FunctionEndpoint extends Endpoint instanceof Function {
/**
* Gets the parameter types of this endpoint.
*/
override string getParameters() {
// For now, return the names of positional and keyword parameters. We don't always have type information, so we can't return type names.
// We don't yet handle splat params or dict splat params.
//
// In Python, there are three types of parameters:
// 1. Positional-only parameters: These are parameters that can only be passed by position and not by keyword.
// 2. Positional-or-keyword parameters: These are parameters that can be passed by position or by keyword.
// 3. Keyword-only parameters: These are parameters that can only be passed by keyword.
//
// The syntax for defining these parameters is as follows:
// ```python
// def f(a, /, b, *, c):
// pass
// ```
// In this example, `a` is a positional-only parameter, `b` is a positional-or-keyword parameter, and `c` is a keyword-only parameter.
//
// We handle positional-only parameters by adding a "/" to the parameter name, reminiscient of the syntax above.
// We handle keyword-only parameters by adding a ":" to the parameter name, to be consistent with the MaD syntax and the other languages.
exists(int nrPosOnly, Function f |
f = this and
nrPosOnly = f.getPositionalParameterCount()
|
result =
"(" +
concat(string key, string value |
// TODO: Once we have information about positional-only parameters:
// Handle positional-only parameters by adding a "/"
value = any(int i | i.toString() = key | f.getArgName(i))
or
exists(Name param | param = f.getAKeywordOnlyArg() |
param.getId() = key and
value = key + ":"
)
|
value, "," order by key
) + ")"
)
}
/** Holds if this API has a supported summary. */
pragma[nomagic]
predicate hasSummary() { FindSummaryModel::hasModel(this) }
/** Holds if this API is a known source. */
pragma[nomagic]
predicate isSource() { FindSourceModel::hasModel(this) }
/** Holds if this API is a known sink. */
pragma[nomagic]
predicate isSink() { FindSinkModel::hasModel(this) }
/** Holds if this API is a known neutral. */
pragma[nomagic]
predicate isNeutral() { FindNeutralModel::hasModel(this) }
/**
* Holds if this API is supported by existing CodeQL libraries, that is, it is either a
* recognized source, sink or neutral or it has a flow summary.
*/
predicate isSupported() {
this.hasSummary() or this.isSource() or this.isSink() or this.isNeutral()
}
override boolean getSupportedStatus() {
if this.isSupported() then result = true else result = false
}
override string getSupportedType() {
this.isSink() and result = "sink"
or
this.isSource() and result = "source"
or
this.hasSummary() and result = "summary"
or
this.isNeutral() and result = "neutral"
or
not this.isSupported() and result = ""
}
override EndpointKind getKind() {
if this.(Function).isMethod()
then
result = this.methodKind()
or
not exists(this.methodKind()) and result = "InstanceMethod"
else result = "Function"
}
private EndpointKind methodKind() {
this.(Function).isMethod() and
(
DP::isClassmethod(this) and result = "ClassMethod"
or
DP::isStaticmethod(this) and result = "StaticMethod"
or
this.(Function).isInitMethod() and result = "InitMethod"
)
}
}
class ClassEndpoint extends Endpoint instanceof Class {
override string getType() { result = type + "." + name }
override string getName() { result = "" }
override string getParameters() { result = "" }
override boolean getSupportedStatus() { result = false }
override string getSupportedType() { result = "" }
override EndpointKind getKind() { result = "Class" }
}

View File

@@ -0,0 +1,93 @@
/**
* Contains utility methods and classes to assist with generating data extensions models.
*/
private import python
private import semmle.python.ApiGraphs
/**
* A file that probably contains tests.
*/
class TestFile extends File {
TestFile() {
this.getRelativePath().regexpMatch(".*(test|spec|examples).+") and
not this.getAbsolutePath().matches("%/ql/test/%") // allows our test cases to work
}
}
/**
* A file that is relevant in the context of library modeling.
*
* In practice, this means a file that is not part of test code.
*/
class RelevantFile extends File {
RelevantFile() { not this instanceof TestFile and not this.inStdlib() }
}
/**
* Gets the dotted path of a scope.
* Class scopes are have a "!" suffix.
*/
string computeAnnotatedScopePath(Scope scope) {
// base case
if scope instanceof Module
then
scope.(Module).isPackageInit() and
result = scope.(Module).getPackageName()
or
not scope.(Module).isPackageInit() and
result = scope.(Module).getName()
else
//recursive cases
if scope instanceof Class
then
result =
computeAnnotatedScopePath(scope.(Class).getEnclosingScope()) + "." + scope.(Class).getName()
+ "!"
else
if scope instanceof Function
then
result =
computeAnnotatedScopePath(scope.(Function).getEnclosingScope()) + "." +
scope.(Function).getName()
else result = "unknown: " + scope.toString()
}
string computeScopePath(Scope scope) {
result = computeAnnotatedScopePath(scope).replaceAll("!", "")
}
signature predicate modelSig(string type, string path);
module FindModel<modelSig/2 model> {
predicate hasModel(Scope scope) {
exists(string type, string path, string searchPath | model(type, path) |
searchPath = possibleMemberPathPrefix(path, scope.getName()) and
pathToScope(scope, type, searchPath)
)
}
/**
* returns the prefix of `path` that might be a path to `member`
*/
bindingset[path, member]
string possibleMemberPathPrefix(string path, string member) {
// functionName must be a substring of path
exists(int index | index = path.indexOf(["Member", "Method"] + "[" + member + "]") |
result = path.prefix(index)
)
}
/**
* Holds if `(type,path)` evaluates to the given entity, when evalauted from a client of the current library.
*/
bindingset[type, path]
predicate pathToScope(Scope scope, string type, string path) {
scope.getLocation().getFile() instanceof RelevantFile and
scope.isPublic() and // only public methods are modeled
computeScopePath(scope) =
type.replaceAll("!", "") + "." +
path.replaceAll("Member[", "").replaceAll("]", "").replaceAll("Instance.", "") +
scope.getName()
}
}

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.data.ModelsAsData
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -43,6 +44,10 @@ module CodeInjection {
CodeExecutionAsSink() { this = any(CodeExecution e).getCode() }
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("code-injection").asSink() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.data.ModelsAsData
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -78,6 +79,10 @@ module CommandInjection {
}
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("command-injection").asSink() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.data.ModelsAsData
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -71,6 +72,10 @@ module LogInjection {
}
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("log-injection").asSink() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.data.ModelsAsData
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -89,6 +90,9 @@ module UrlRedirect {
}
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("url-redirection").asSink() }
}
/**
* The right side of a string-concat, considered as a sanitizer.
*/

View File

@@ -0,0 +1,14 @@
/**
* @name Fetch endpoints for use in the model editor (framework mode)
* @description A list of endpoints accessible (methods and attributes) for consumers of the library. Excludes test and generated code.
* @kind table
* @id py/utils/modeleditor/framework-mode-endpoints
* @tags modeleditor endpoints framework-mode
*/
import modeling.ModelEditor
from Endpoint endpoint
select endpoint, endpoint.getNamespace(), endpoint.getType(), endpoint.getName(),
endpoint.getParameters(), endpoint.getSupportedStatus(), endpoint.getFileName(),
endpoint.getSupportedType(), endpoint.getKind()

View File

@@ -0,0 +1,9 @@
| MyPackage/Foo.py:1:1:1:9 | Class C1 | MyPackage | Foo.C1 | | | false | Foo.py | | Class |
| MyPackage/Foo.py:2:5:2:17 | Function m1 | MyPackage | Foo.C1 | m1 | (self) | true | Foo.py | source | InstanceMethod |
| MyPackage/Foo.py:5:5:5:20 | Function m2 | MyPackage | Foo.C1 | m2 | (self,x) | true | Foo.py | source | InstanceMethod |
| MyPackage/Foo.py:9:5:9:14 | Function m3 | MyPackage | Foo.C1 | m3 | (x) | true | Foo.py | summary | StaticMethod |
| MyPackage/Foo.py:13:5:13:19 | Function m4 | MyPackage | Foo.C1 | m4 | (cls,x) | true | Foo.py | summary | ClassMethod |
| MyPackage/Foo.py:16:1:16:13 | Class C2 | MyPackage | Foo.C2 | | | false | Foo.py | | Class |
| MyPackage/Foo.py:17:5:17:17 | Function m1 | MyPackage | Foo.C2 | m1 | (self) | false | Foo.py | | InstanceMethod |
| MyPackage/Foo.py:20:5:20:27 | Function c2only_m1 | MyPackage | Foo.C2 | c2only_m1 | (self,x) | false | Foo.py | | InstanceMethod |
| TopLevel.py:3:1:3:38 | Function top_level_funciton | TopLevel | | top_level_funciton | (x,y,z:) | false | TopLevel.py | | Function |

View File

@@ -0,0 +1,14 @@
extensions:
- addsTo:
pack: codeql/python-all
extensible: sourceModel
data:
- ["MyPackage.Foo.C1","Member[m1].ReturnValue","remote"]
- ["MyPackage","Member[Foo].Member[C1].Instance.Member[m2].ReturnValue","remote"]
- addsTo:
pack: codeql/python-all
extensible: summaryModel
data:
- ["MyPackage.Foo.C1!","Member[m3]","Argument[0]","ReturnValue","value"]
- ["MyPackage","Member[Foo].Member[C1].Member[m4]","Argument[0]","ReturnValue","value"]

View File

@@ -0,0 +1 @@
utils/modeleditor/FrameworkModeEndpoints.ql

View File

@@ -0,0 +1,21 @@
class C1:
def m1(self):
print("C1.m1()")
def m2(self, x):
return x
@staticmethod
def m3(x):
return x
@classmethod
def m4(cls, x):
return x
class C2(C1):
def m1(self):
print("C2.m1()")
def c2only_m1(self, x):
return x

View File

@@ -0,0 +1,6 @@
from MyPackage import Foo
def top_level_funciton(x, /, y, *, z):
return [x, y, z]
top_level_value = Foo.C1()