diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 04e8ad0587f..d91d51d0c66 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -1138,7 +1138,9 @@ predicate clearsContent(Node n, ContentSet cs) { * Holds if the value that is being tracked is expected to be stored inside content `c` * at node `n`. */ -predicate expectsContent(Node n, ContentSet c) { none() } +predicate expectsContent(Node n, ContentSet c) { + FlowSummaryImpl::Private::Steps::summaryExpectsContent(n.(FlowSummaryNode).getSummaryNode(), c) +} /** * Holds if values stored inside attribute `c` are cleared at node `n`. diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll index 0931fcca0dc..6d128776700 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll @@ -91,6 +91,8 @@ module Input implements InputSig cs.isAnyTupleOrDictionaryElement() and result = "AnyTupleOrDictionaryElement" and arg = "" } + string encodeWithContent(ContentSet c, string arg) { result = "With" + encodeContent(c, arg) } + bindingset[token] ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) { // needed to support `Argument[x..y]` ranges diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index a9fd72530f5..df69a010fd8 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -4199,11 +4199,9 @@ module StdlibPrivate { // The positional argument contains a mapping. // TODO: these values can be overwritten by keyword arguments // - dict mapping - exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | - input = "Argument[0].DictionaryElement[" + key + "]" and - output = "ReturnValue.DictionaryElement[" + key + "]" and - preservesValue = true - ) + input = "Argument[0].WithAnyDictionaryElement" and + output = "ReturnValue" and + preservesValue = true or // - list-of-pairs mapping input = "Argument[0].ListElement.TupleElement[1]" and @@ -4240,9 +4238,7 @@ module StdlibPrivate { or input = "Argument[0].SetElement" or - exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | - input = "Argument[0].TupleElement[" + i.toString() + "]" - ) + input = "Argument[0].AnyTupleElement" // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent ) and // Element content is mutated into list element content @@ -4266,11 +4262,9 @@ module StdlibPrivate { } override predicate propagatesFlow(string input, string output, boolean preservesValue) { - exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | - input = "Argument[0].TupleElement[" + i.toString() + "]" and - output = "ReturnValue.TupleElement[" + i.toString() + "]" and - preservesValue = true - ) + input = "Argument[0].WithAnyTupleElement" and + output = "ReturnValue" and + preservesValue = true or input = "Argument[0].ListElement" and output = "ReturnValue" and @@ -4294,9 +4288,7 @@ module StdlibPrivate { or input = "Argument[0].SetElement" or - exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | - input = "Argument[0].TupleElement[" + i.toString() + "]" - ) + input = "Argument[0].AnyTupleElement" // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent ) and output = "ReturnValue.SetElement" and @@ -4342,9 +4334,7 @@ module StdlibPrivate { or input = "Argument[0].SetElement" or - exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | - input = "Argument[0].TupleElement[" + i.toString() + "]" - ) + input = "Argument[0].AnyTupleElement" // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent ) and output = "ReturnValue.ListElement" and @@ -4372,9 +4362,7 @@ module StdlibPrivate { or content = "SetElement" or - exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | - content = "TupleElement[" + i.toString() + "]" - ) + content = "AnyTupleElement" | // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent input = "Argument[0]." + content and @@ -4404,9 +4392,7 @@ module StdlibPrivate { or input = "Argument[0].SetElement" or - exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | - input = "Argument[0].TupleElement[" + i.toString() + "]" - ) + input = "Argument[0].AnyTupleElement" // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent ) and output = "ReturnValue.ListElement" and @@ -4434,9 +4420,7 @@ module StdlibPrivate { or input = "Argument[0].SetElement" or - exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | - input = "Argument[0].TupleElement[" + i.toString() + "]" - ) + input = "Argument[0].AnyTupleElement" // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent ) and output = "ReturnValue" and @@ -4468,9 +4452,7 @@ module StdlibPrivate { // We reduce generality slightly by not tracking tuple contents on list arguments beyond the first, for performance. // TODO: Once we have TupleElementAny, this generality can be increased. i = 0 and - exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() | - input = "Argument[1].TupleElement[" + j.toString() + "]" - ) + input = "Argument[1].AnyTupleElement" // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent ) and output = "Argument[0].Parameter[" + i.toString() + "]" and @@ -4499,9 +4481,7 @@ module StdlibPrivate { or input = "Argument[1].SetElement" or - exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | - input = "Argument[1].TupleElement[" + i.toString() + "]" - ) + input = "Argument[1].AnyTupleElement" // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent ) and (output = "Argument[0].Parameter[0]" or output = "ReturnValue.ListElement") and @@ -4525,9 +4505,7 @@ module StdlibPrivate { or input = "Argument[0].SetElement" or - exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | - input = "Argument[0].TupleElement[" + i.toString() + "]" - ) + input = "Argument[0].AnyTupleElement" // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent ) and output = "ReturnValue.ListElement.TupleElement[1]" and @@ -4552,12 +4530,7 @@ module StdlibPrivate { or input = "Argument[" + i.toString() + "].SetElement" or - // We reduce generality slightly by not tracking tuple contents on arguments beyond the first two, for performance. - // TODO: Once we have TupleElementAny, this generality can be increased. - i in [0 .. 1] and - exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() | - input = "Argument[" + i.toString() + "].TupleElement[" + j.toString() + "]" - ) + input = "Argument[" + i.toString() + "].AnyTupleElement" // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent ) and output = "ReturnValue.ListElement.TupleElement[" + i.toString() + "]" and @@ -4580,12 +4553,6 @@ module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } override predicate propagatesFlow(string input, string output, boolean preservesValue) { - exists(DataFlow::Content c | - input = "Argument[self]." + c.getMaDRepresentation() and - output = "ReturnValue." + c.getMaDRepresentation() and - preservesValue = true - ) - or input = "Argument[self]" and output = "ReturnValue" and preservesValue = true @@ -4741,12 +4708,10 @@ module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } override predicate propagatesFlow(string input, string output, boolean preservesValue) { - exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | - input = "Argument[self].DictionaryElement[" + key + "]" and - output = "ReturnValue.TupleElement[1]" and - preservesValue = true - // TODO: put `key` into "ReturnValue.TupleElement[0]" - ) + input = "Argument[self].AnyDictionaryElement" and + output = "ReturnValue.TupleElement[1]" and + preservesValue = true + // TODO: put `key` into "ReturnValue.TupleElement[0]" } } @@ -4825,11 +4790,9 @@ module StdlibPrivate { } override predicate propagatesFlow(string input, string output, boolean preservesValue) { - exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | - input = "Argument[self].DictionaryElement[" + key + "]" and - output = "ReturnValue.ListElement" and - preservesValue = true - ) + input = "Argument[self].AnyDictionaryElement" and + output = "ReturnValue.ListElement" and + preservesValue = true or input = "Argument[self]" and output = "ReturnValue" and @@ -4876,11 +4839,9 @@ module StdlibPrivate { } override predicate propagatesFlow(string input, string output, boolean preservesValue) { - exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | - input = "Argument[self].DictionaryElement[" + key + "]" and - output = "ReturnValue.ListElement.TupleElement[1]" and - preservesValue = true - ) + input = "Argument[self].AnyDictionaryElement" and + output = "ReturnValue.ListElement.TupleElement[1]" and + preservesValue = true or // TODO: Add the keys to output list input = "Argument[self]" and diff --git a/python/ql/test/library-tests/dataflow/coverage/test_builtins.py b/python/ql/test/library-tests/dataflow/coverage/test_builtins.py index 8e87e56dc2e..7ef7866ec17 100644 --- a/python/ql/test/library-tests/dataflow/coverage/test_builtins.py +++ b/python/ql/test/library-tests/dataflow/coverage/test_builtins.py @@ -589,11 +589,11 @@ def test_zip_tuple(): SINK(z[0][0]) # $ flow="SOURCE, l:-7 -> z[0][0]" SINK(z[0][1]) # $ flow="SOURCE, l:-7 -> z[0][1]" - SINK_F(z[0][2]) + SINK_F(z[0][2]) # $ SPURIOUS: flow="SOURCE, l:-7 -> z[0][2]" SINK_F(z[0][3]) SINK(z[1][0]) # $ flow="SOURCE, l:-11 -> z[1][0]" SINK_F(z[1][1]) # $ SPURIOUS: flow="SOURCE, l:-11 -> z[1][1]" - SINK(z[1][2]) # $ MISSING: flow="SOURCE, l:-11 -> z[1][2]" # Tuple contents are not tracked beyond the first two arguments for performance. + SINK(z[1][2]) # $ flow="SOURCE, l:-11 -> z[1][2]" SINK_F(z[1][3]) @expects(4) diff --git a/python/ql/test/library-tests/frameworks/django-orm/testapp/orm_tests.py b/python/ql/test/library-tests/frameworks/django-orm/testapp/orm_tests.py index 3e8ba31d019..7081f73b525 100644 --- a/python/ql/test/library-tests/frameworks/django-orm/testapp/orm_tests.py +++ b/python/ql/test/library-tests/frameworks/django-orm/testapp/orm_tests.py @@ -362,7 +362,7 @@ def test_load_in_bulk(): # see https://docs.djangoproject.com/en/4.0/ref/models/querysets/#in-bulk d = TestLoad.objects.in_bulk([1]) for val in d.values(): - SINK(val.text) # $ MISSING: flow + SINK(val.text) # $ flow="SOURCE, l:-65 -> val.text" SINK(d[1].text) # $ flow="SOURCE, l:-66 -> d[1].text"