Merge branch 'main' into bazookamusic/python-prompt-injection

This commit is contained in:
Sotiris Dragonas
2026-07-02 15:04:51 +03:00
committed by GitHub
362 changed files with 15469 additions and 15340 deletions

View File

@@ -0,0 +1 @@
print(0)

View File

@@ -0,0 +1 @@
| code/main.py:0:0:0:0 | code/main.py | |

View File

@@ -0,0 +1,5 @@
import python
from File f, string relative
where if exists(f.getRelativePath()) then relative = "relative" else relative = ""
select f, relative

View File

@@ -0,0 +1,7 @@
import runs_on
@runs_on.windows
def test(codeql, python, cwd, subst_drive):
drive = subst_drive(cwd / "code")
codeql.database.create(source_root=drive)

View File

@@ -529,7 +529,7 @@ predicate simpleLocalFlowStepForTypetracking(Node nodeFrom, Node nodeTo) {
}
private predicate summaryLocalStep(Node nodeFrom, Node nodeTo, string model) {
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(),
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom,
nodeTo.(FlowSummaryNode).getSummaryNode(), true, model)
}
@@ -1138,7 +1138,9 @@ predicate clearsContent(Node n, ContentSet cs) {
* Holds if the value that is being tracked is expected to be stored inside content `c`
* at node `n`.
*/
predicate expectsContent(Node n, ContentSet c) { none() }
predicate expectsContent(Node n, ContentSet c) {
FlowSummaryImpl::Private::Steps::summaryExpectsContent(n.(FlowSummaryNode).getSummaryNode(), c)
}
/**
* Holds if values stored inside attribute `c` are cleared at node `n`.

View File

@@ -20,6 +20,8 @@ module Input implements InputSig<Location, DataFlowImplSpecific::PythonDataFlow>
class SinkBase = Void;
class FlowSummaryCallBase = Void;
predicate callableFromSource(SummarizedCallableBase c) { none() }
ArgumentPosition callbackSelfParameterPosition() { result.isLambdaSelf() }
@@ -91,6 +93,8 @@ module Input implements InputSig<Location, DataFlowImplSpecific::PythonDataFlow>
cs.isAnyTupleOrDictionaryElement() and result = "AnyTupleOrDictionaryElement" and arg = ""
}
string encodeWithContent(ContentSet c, string arg) { result = "With" + encodeContent(c, arg) }
bindingset[token]
ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) {
// needed to support `Argument[x..y]` ranges
@@ -109,6 +113,10 @@ module Input implements InputSig<Location, DataFlowImplSpecific::PythonDataFlow>
private import Make<Location, DataFlowImplSpecific::PythonDataFlow, Input> as Impl
private module StepsInput implements Impl::Private::StepsInputSig {
Impl::Private::SummaryNode getSummaryNode(Node n) {
result = n.(FlowSummaryNode).getSummaryNode()
}
overlay[global]
DataFlowCall getACall(Public::SummarizedCallable sc) {
result =

View File

@@ -80,10 +80,8 @@ private module Cached {
) and
model = ""
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom
.(DataFlowPrivate::FlowSummaryNode)
.getSummaryNode(), nodeTo.(DataFlowPrivate::FlowSummaryNode).getSummaryNode(), false,
model)
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom,
nodeTo.(DataFlowPrivate::FlowSummaryNode).getSummaryNode(), false, model)
}
}

View File

@@ -4199,11 +4199,9 @@ module StdlibPrivate {
// The positional argument contains a mapping.
// TODO: these values can be overwritten by keyword arguments
// - dict mapping
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[0].DictionaryElement[" + key + "]" and
output = "ReturnValue.DictionaryElement[" + key + "]" and
preservesValue = true
)
input = "Argument[0].WithAnyDictionaryElement" and
output = "ReturnValue" and
preservesValue = true
or
// - list-of-pairs mapping
input = "Argument[0].ListElement.TupleElement[1]" and
@@ -4240,9 +4238,7 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
input = "Argument[0].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
// Element content is mutated into list element content
@@ -4266,11 +4262,9 @@ module StdlibPrivate {
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]" and
output = "ReturnValue.TupleElement[" + i.toString() + "]" and
preservesValue = true
)
input = "Argument[0].WithAnyTupleElement" and
output = "ReturnValue" and
preservesValue = true
or
input = "Argument[0].ListElement" and
output = "ReturnValue" and
@@ -4294,9 +4288,7 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
input = "Argument[0].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.SetElement" and
@@ -4342,9 +4334,7 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
input = "Argument[0].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement" and
@@ -4372,9 +4362,7 @@ module StdlibPrivate {
or
content = "SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
content = "TupleElement[" + i.toString() + "]"
)
content = "AnyTupleElement"
|
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
input = "Argument[0]." + content and
@@ -4404,9 +4392,7 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
input = "Argument[0].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement" and
@@ -4434,9 +4420,7 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
input = "Argument[0].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue" and
@@ -4468,9 +4452,7 @@ module StdlibPrivate {
// We reduce generality slightly by not tracking tuple contents on list arguments beyond the first, for performance.
// TODO: Once we have TupleElementAny, this generality can be increased.
i = 0 and
exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() |
input = "Argument[1].TupleElement[" + j.toString() + "]"
)
input = "Argument[1].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "Argument[0].Parameter[" + i.toString() + "]" and
@@ -4499,9 +4481,7 @@ module StdlibPrivate {
or
input = "Argument[1].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[1].TupleElement[" + i.toString() + "]"
)
input = "Argument[1].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
(output = "Argument[0].Parameter[0]" or output = "ReturnValue.ListElement") and
@@ -4525,9 +4505,7 @@ module StdlibPrivate {
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
input = "Argument[0].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement.TupleElement[1]" and
@@ -4552,12 +4530,7 @@ module StdlibPrivate {
or
input = "Argument[" + i.toString() + "].SetElement"
or
// We reduce generality slightly by not tracking tuple contents on arguments beyond the first two, for performance.
// TODO: Once we have TupleElementAny, this generality can be increased.
i in [0 .. 1] and
exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() |
input = "Argument[" + i.toString() + "].TupleElement[" + j.toString() + "]"
)
input = "Argument[" + i.toString() + "].AnyTupleElement"
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement.TupleElement[" + i.toString() + "]" and
@@ -4580,12 +4553,6 @@ module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::Content c |
input = "Argument[self]." + c.getMaDRepresentation() and
output = "ReturnValue." + c.getMaDRepresentation() and
preservesValue = true
)
or
input = "Argument[self]" and
output = "ReturnValue" and
preservesValue = true
@@ -4741,12 +4708,10 @@ module StdlibPrivate {
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.TupleElement[1]" and
preservesValue = true
// TODO: put `key` into "ReturnValue.TupleElement[0]"
)
input = "Argument[self].AnyDictionaryElement" and
output = "ReturnValue.TupleElement[1]" and
preservesValue = true
// TODO: put `key` into "ReturnValue.TupleElement[0]"
}
}
@@ -4825,11 +4790,9 @@ module StdlibPrivate {
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.ListElement" and
preservesValue = true
)
input = "Argument[self].AnyDictionaryElement" and
output = "ReturnValue.ListElement" and
preservesValue = true
or
input = "Argument[self]" and
output = "ReturnValue" and
@@ -4876,11 +4839,9 @@ module StdlibPrivate {
}
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.ListElement.TupleElement[1]" and
preservesValue = true
)
input = "Argument[self].AnyDictionaryElement" and
output = "ReturnValue.ListElement.TupleElement[1]" and
preservesValue = true
or
// TODO: Add the keys to output list
input = "Argument[self]" and

View File

@@ -589,11 +589,11 @@ def test_zip_tuple():
SINK(z[0][0]) # $ flow="SOURCE, l:-7 -> z[0][0]"
SINK(z[0][1]) # $ flow="SOURCE, l:-7 -> z[0][1]"
SINK_F(z[0][2])
SINK_F(z[0][2]) # $ SPURIOUS: flow="SOURCE, l:-7 -> z[0][2]"
SINK_F(z[0][3])
SINK(z[1][0]) # $ flow="SOURCE, l:-11 -> z[1][0]"
SINK_F(z[1][1]) # $ SPURIOUS: flow="SOURCE, l:-11 -> z[1][1]"
SINK(z[1][2]) # $ MISSING: flow="SOURCE, l:-11 -> z[1][2]" # Tuple contents are not tracked beyond the first two arguments for performance.
SINK(z[1][2]) # $ flow="SOURCE, l:-11 -> z[1][2]"
SINK_F(z[1][3])
@expects(4)

View File

@@ -362,7 +362,7 @@ def test_load_in_bulk():
# see https://docs.djangoproject.com/en/4.0/ref/models/querysets/#in-bulk
d = TestLoad.objects.in_bulk([1])
for val in d.values():
SINK(val.text) # $ MISSING: flow
SINK(val.text) # $ flow="SOURCE, l:-65 -> val.text"
SINK(d[1].text) # $ flow="SOURCE, l:-66 -> d[1].text"