Merge pull request #16789 from yoff/python/document-models-as-data

python: Document MaD format
This commit is contained in:
yoff
2024-06-25 15:46:28 +02:00
committed by GitHub
13 changed files with 534 additions and 4 deletions

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* A number of Python queries now support sinks defined vi data extensions. The format of data extensions for Python has been documented.

View File

@@ -29,8 +29,8 @@ private module FabricV1 {
// -------------------------------------------------------------------------
// fabric.api
// -------------------------------------------------------------------------
/** Gets a reference to the `fabric.api` module. */
API::Node api() { result = fabric().getMember("api") }
/** Gets a reference to the `fabric.api` module. Also known as `fabric.operations` */
API::Node api() { result = fabric().getMember(["api", "operations"]) }
/** Provides models for the `fabric.api` module */
module Api {

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.data.ModelsAsData
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -43,6 +44,10 @@ module CodeInjection {
CodeExecutionAsSink() { this = any(CodeExecution e).getCode() }
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("code-injection").asSink() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.data.ModelsAsData
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -78,6 +79,10 @@ module CommandInjection {
}
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("command-injection").asSink() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.data.ModelsAsData
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -71,6 +72,10 @@ module LogInjection {
}
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("log-injection").asSink() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.data.ModelsAsData
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -48,6 +49,10 @@ module UnsafeDeserialization {
}
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("unsafe-deserialization").asSink() }
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.frameworks.data.ModelsAsData
/**
* Provides default sources, sinks and sanitizers for detecting
@@ -89,6 +90,10 @@ module UrlRedirect {
}
}
private class SinkFromModel extends Sink {
SinkFromModel() { this = ModelOutput::getASinkNode("url-redirection").asSink() }
}
/**
* The right side of a string-concat, considered as a sanitizer.
*/

View File

@@ -25,3 +25,15 @@ extensions:
- ["foo.MS_Class", "Member[instance_method]", "Argument[0]", "ReturnValue.TupleElement[1]", "value"]
- ["foo.MS_Class", "Member[explicit_self]", "Argument[self:]", "ReturnValue", "value"]
- ["json", "Member[MS_loads]", "Argument[0]", "ReturnValue", "taint"]
- addsTo:
pack: codeql/python-all
extensible: typeModel
data:
- ["foo.MS_Class", "foo", "Member[get_instance].ReturnValue"]
- ["foo.MS_Class!", "foo", "Member[get_class].ReturnValue"]
# Ideally this would be a consequence of the above line
- ["foo.MS_Class", "foo", "Member[get_class].ReturnValue.Instance"]
- ["foo.MS_Class", "foo.MS_Factory!", "Member[get_instance].ReturnValue"]
- ["foo.MS_Class", "foo.MS_Factory", "Member[make].ReturnValue"]
- ["foo.MS_Class", "foo.Impl.MS_Class_Impl", ""]

View File

@@ -25,3 +25,15 @@ extensions:
- ["foo.MS_Class", "Member[instance_method]", "Argument[0]", "ReturnValue.TupleElement[1]", "value"]
- ["foo.MS_Class", "Member[explicit_self]", "Argument[self:]", "ReturnValue", "value"]
- ["json", "Member[MS_loads]", "Argument[0]", "ReturnValue", "taint"]
- addsTo:
pack: codeql/python-all
extensible: typeModel
data:
- ["foo.MS_Class", "foo", "Member[get_instance].ReturnValue"]
- ["foo.MS_Class!", "foo", "Member[get_class].ReturnValue"]
# Ideally this would be a consequence of the above line
- ["foo.MS_Class", "foo", "Member[get_class].ReturnValue.Instance"]
- ["foo.MS_Class", "foo.MS_Factory!", "Member[get_instance].ReturnValue"]
- ["foo.MS_Class", "foo.MS_Factory", "Member[make].ReturnValue"]
- ["foo.MS_Class", "foo.Impl.MS_Class_Impl", ""]

View File

@@ -30,7 +30,7 @@ def SINK_F(x):
ensure_tainted = ensure_not_tainted = print
TAINTED_STRING = "TAINTED_STRING"
from foo import MS_identity, MS_apply_lambda, MS_reversed, MS_list_map, MS_append_to_list, MS_spread, MS_spread_all
from foo import MS_identity, MS_apply_lambda, MS_reversed, MS_list_map, MS_append_to_list, MS_spread, MS_spread_all, Impl
# Simple summary
via_identity = MS_identity(SOURCE)
@@ -122,7 +122,7 @@ a, b = MS_spread_all(SOURCE)
SINK(a) # $ flow="SOURCE, l:-1 -> a"
SINK(b) # $ flow="SOURCE, l:-2 -> b"
from foo import MS_Class, MS_Class_transitive
from foo import MS_Class, MS_Class_transitive, get_instance, get_class, MS_Factory
# Class summaries
class_via_positional = MS_Class(SOURCE)
@@ -175,6 +175,24 @@ SINK_F(MS_Class.explicit_self(SOURCE))
# Instead, `Argument[self:]` refers to a keyword argument named `self` (which you are allowed to do in Python)
SINK(c.explicit_self(self = SOURCE)) # $ flow="SOURCE -> c.explicit_self(..)"
instance = get_instance()
SINK(instance.instance_method(SOURCE)[1]) # $ flow="SOURCE -> instance.instance_method(..)[1]"
returned_class = get_class()
SINK(returned_class(SOURCE).config) # $ flow="SOURCE -> returned_class(..).config"
SINK(returned_class().instance_method(SOURCE)[1]) # $flow="SOURCE -> returned_class().instance_method(..)[1]"
fatory_instance = MS_Factory.get_instance()
SINK(fatory_instance.instance_method(SOURCE)[1]) # $ flow="SOURCE -> fatory_instance.instance_method(..)[1]"
factory = MS_Factory()
SINK(factory.make().instance_method(SOURCE)[1]) # $ flow="SOURCE -> factory.make().instance_method(..)[1]"
also_instance = Impl.MS_Class_Impl()
SINK(also_instance.instance_method(SOURCE)[1]) # $ flow="SOURCE -> also_instance.instance_method(..)[1]"
# Modeled flow-summary is not value preserving
from json import MS_loads as json_loads

View File

@@ -12,3 +12,8 @@ sudo("cmd1; cmd2") # $getCommand="cmd1; cmd2"
local(command="cmd1; cmd2") # $getCommand="cmd1; cmd2"
run(command="cmd1; cmd2") # $getCommand="cmd1; cmd2"
sudo(command="cmd1; cmd2") # $getCommand="cmd1; cmd2"
from fabric import operations
operations.local("cmd1; cmd2") # $getCommand="cmd1; cmd2"
operations.local(command="cmd1; cmd2") # $getCommand="cmd1; cmd2"