From eaef783f4b7a24b726a7b9c824d90fa2fd5d8f2e Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 15 Oct 2024 11:58:47 +0000 Subject: [PATCH] Python: Add partial model for `copy.replace` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends our modelling to partially cover the behaviour of `copy.replace`. In particular, we model this in two ways: Firstly, we extend the existing Models-as-Data row for `copy` and `deepcopy` to also cover `replace`. This means that we treat the result of `replace` as containing all of the fields of the original object. This is somewhat _more_ than we want, as strictly speaking the fields that are overwritten should _not_ propagate flow through the `replace` call, but currently we don't have a good way of modelling this blocking of flow. Secondly, we add a flow summary that adds flow from named arguments of the `replace` call to the corresponding fields on the base object. This ensures that we at least have the new flow arising from the `replace` call. Note that the flow summary adds this flow for _all_ named arguments of _all_ `replace` calls throughout the codebase. However, since any particular `replace` call will only populate a subset of these (the subset consisting of exactly those named arguments that are in that particular call), this does not cause any unwanted crosstalk between different `replace` calls.ยง --- .../semmle/python/frameworks/Stdlib.model.yml | 2 +- .../lib/semmle/python/frameworks/Stdlib.qll | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml index 683b0aa9b3d..96e1a284f3e 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml @@ -45,7 +45,7 @@ extensions: # See https://docs.python.org/3/library/contextlib.html#contextlib.ExitStack - ["contextlib.ExitStack", "Member[enter_context]", "Argument[0,cm:]", "ReturnValue", "taint"] # See https://docs.python.org/3/library/copy.html#copy.deepcopy - - ["copy", "Member[copy,deepcopy]", "Argument[0,x:]", "ReturnValue", "value"] + - ["copy", "Member[copy,deepcopy,replace]", "Argument[0,x:]", "ReturnValue", "value"] # See # - https://docs.python.org/3/library/ctypes.html#ctypes.create_string_buffer # - https://docs.python.org/3/library/ctypes.html#ctypes.create_unicode_buffer diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index f208f19c74c..45878c8160b 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -4549,6 +4549,32 @@ module StdlibPrivate { } } + /** A flow summary for `copy.replace`. */ + class ReplaceSummary extends SummarizedCallable { + ReplaceSummary() { this = "copy.replace" } + + override DataFlow::CallCfgNode getACall() { + result = API::moduleImport("copy").getMember("replace").getACall() + } + + override DataFlow::ArgumentNode getACallback() { + result = API::moduleImport("copy").getMember("replace").getAValueReachableFromSource() + } + + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + exists(CallNode c, string name, ControlFlowNode n, DataFlow::AttributeContent ac | + c.getFunction().(NameNode).getId() = "replace" or + c.getFunction().(AttrNode).getName() = "replace" + | + n = c.getArgByName(name) and + ac.getAttribute() = name and + input = "Argument[" + name + ":]" and + output = "ReturnValue." + ac.getMaDRepresentation() and + preservesValue = true + ) + } + } + /** * A flow summary for `pop` either for list or set. * This ignores the index if given, since content is