python: Document MaD format

- add a few tests reflecting the documentation
- make the mentioned sink-kinds have an effect on relevant queries
This commit is contained in:
Rasmus Lerchedahl Petersen
2024-06-14 16:02:38 +02:00
parent 6dbdc9e17f
commit 5cb37f5c4c
9 changed files with 502 additions and 1 deletions

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.data.ModelsAsData
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -43,6 +44,10 @@ module CodeInjection {
CodeExecutionAsSink() { this = any(CodeExecution e).getCode() }
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("code-injection").asSink() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.data.ModelsAsData
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -78,6 +79,10 @@ module CommandInjection {
}
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("command-injection").asSink() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.data.ModelsAsData
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -71,6 +72,10 @@ module LogInjection {
}
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("log-injection").asSink() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.data.ModelsAsData
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -48,6 +49,10 @@ module UnsafeDeserialization {
}
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("unsafe-deserialization").asSink() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.data.ModelsAsData
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -89,6 +90,9 @@ module UrlRedirect {
}
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("url-redirection").asSink() }
}
/**
* The right side of a string-concat, considered as a sanitizer.
*/

View File

@@ -25,3 +25,12 @@ extensions:
- ["foo.MS_Class", "Member[instance_method]", "Argument[0]", "ReturnValue.TupleElement[1]", "value"]
- ["foo.MS_Class", "Member[explicit_self]", "Argument[self:]", "ReturnValue", "value"]
- ["json", "Member[MS_loads]", "Argument[0]", "ReturnValue", "taint"]
- addsTo:
pack: codeql/python-all
extensible: typeModel
data:
- ["foo.MS_Class", "foo", "Member[get_instance].ReturnValue"]
- ["foo.MS_Class!", "foo", "Member[get_class].ReturnValue"]
# Ideally this would be a consequence of the above line
- ["foo.MS_Class", "foo", "Member[get_class].ReturnValue.Instance"]

View File

@@ -25,3 +25,12 @@ extensions:
- ["foo.MS_Class", "Member[instance_method]", "Argument[0]", "ReturnValue.TupleElement[1]", "value"]
- ["foo.MS_Class", "Member[explicit_self]", "Argument[self:]", "ReturnValue", "value"]
- ["json", "Member[MS_loads]", "Argument[0]", "ReturnValue", "taint"]
- addsTo:
pack: codeql/python-all
extensible: typeModel
data:
- ["foo.MS_Class", "foo", "Member[get_instance].ReturnValue"]
- ["foo.MS_Class!", "foo", "Member[get_class].ReturnValue"]
# Ideally this would be a consequence of the above line
- ["foo.MS_Class", "foo", "Member[get_class].ReturnValue.Instance"]

View File

@@ -122,7 +122,7 @@ a, b = MS_spread_all(SOURCE)
SINK(a) # $ flow="SOURCE, l:-1 -> a"
SINK(b) # $ flow="SOURCE, l:-2 -> b"
from foo import MS_Class, MS_Class_transitive
from foo import MS_Class, MS_Class_transitive, get_instance, get_class
# Class summaries
class_via_positional = MS_Class(SOURCE)
@@ -175,6 +175,16 @@ SINK_F(MS_Class.explicit_self(SOURCE))
# Instead, `Argument[self:]` refers to a keyword argument named `self` (which you are allowed to do in Python)
SINK(c.explicit_self(self = SOURCE)) # $ flow="SOURCE -> c.explicit_self(..)"
instance = get_instance()
SINK(instance.instance_method(SOURCE)[1]) # $ flow="SOURCE -> instance.instance_method(..)[1]"
returned_class = get_class()
SINK(returned_class(SOURCE).config) # $ flow="SOURCE -> returned_class(..).config"
SINK(returned_class().instance_method(SOURCE)[1]) # $flow="SOURCE -> returned_class().instance_method(..)[1]"
# Modeled flow-summary is not value preserving
from json import MS_loads as json_loads