Merge pull request #16971 from RasmusWL/mad-dict-source

Python: Add MaD support for DictionaryElement/DictionaryElementAny for sources
This commit is contained in:
yoff
2024-07-31 13:40:07 +02:00
committed by GitHub
5 changed files with 50 additions and 4 deletions

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added support for `DictionaryElement[<key>]` and `DictionaryElementAny` when Customizing Library Models for `sourceModel` (see https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-python/)

View File

@@ -134,9 +134,25 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) {
token.getAnArgument() = "any-named" and
result = node.getKeywordParameter(_)
)
or
// content based steps
//
// note: if we want to migrate to use `FlowSummaryImpl::Input::encodeContent` like
// they do in Ruby, be aware that we currently don't make
// `DataFlow::DictionaryElementContent` just from seeing a subscript read, so we would
// need to add that. (also need to handle things like `DictionaryElementAny` which
// doesn't have any value for .getAnArgument())
(
token.getName() = "DictionaryElement" and
result = node.getSubscript(token.getAnArgument())
or
token.getName() = "DictionaryElementAny" and
result = node.getASubscript() and
not exists(token.getAnArgument())
// TODO: ListElement/SetElement/TupleElement
)
// Some features don't have MaD tokens yet, they would need to be added to API-graphs first.
// - decorators ("DecoratedClass", "DecoratedMember", "DecoratedParameter")
// - Array/Map elements ("ArrayElement", "Element", "MapKey", "MapValue")
}
/**
@@ -242,7 +258,11 @@ InvokeNode getAnInvocationOf(API::Node node) { result = node.getACall() }
*/
bindingset[name]
predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) {
name = ["Member", "Instance", "Awaited", "Call", "Method", "Subclass"]
name =
[
"Member", "Instance", "Awaited", "Call", "Method", "Subclass", "DictionaryElement",
"DictionaryElementAny"
]
}
/**
@@ -250,7 +270,7 @@ predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) {
* in an identifying access path.
*/
predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) {
name = ["Instance", "Awaited", "Call", "Subclass"]
name = ["Instance", "Awaited", "Call", "Subclass", "DictionaryElementAny"]
}
/**
@@ -259,7 +279,7 @@ predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) {
*/
bindingset[name, argument]
predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument) {
name = ["Member", "Method"] and
name = ["Member", "Method", "DictionaryElement"] and
exists(argument)
or
name = ["Argument", "Parameter"] and

View File

@@ -106,6 +106,8 @@ isSource
| test.py:117:31:117:41 | ControlFlowNode for getSource() | test-source |
| test.py:118:35:118:45 | ControlFlowNode for getSource() | test-source |
| test.py:119:20:119:30 | ControlFlowNode for getSource() | test-source |
| test.py:124:1:124:33 | ControlFlowNode for Attribute() | test-source |
| test.py:126:11:126:43 | ControlFlowNode for Attribute() | test-source |
syntaxErrors
| Member[foo |
| Member[foo] .Member[bar] |

View File

@@ -23,6 +23,12 @@ extensions:
- ["testlib", "Member[ArgPos].Member[MyClass].Subclass.Member[otherSelfTest].Parameter[0]", "test-source"]
- ["testlib", "Member[ArgPos].Member[MyClass].Subclass.Member[anyParam].Parameter[any]", "test-source"]
- ["testlib", "Member[ArgPos].Member[MyClass].Subclass.Member[anyNamed].Parameter[any-named]", "test-source"]
# test steps through content
- ["testlib", "Member[source_dict].DictionaryElement[key].Member[func].ReturnValue", "test-source"]
- ["testlib", "Member[source_dict_any].DictionaryElementAny.Member[func].ReturnValue", "test-source"]
# TODO: Add support for list/tuples
# - ["testlib", "Member[source_list].ListElement.Member[func].ReturnValue", "test-source"]
# - ["testlib", "Member[source_tuple].TupleElement[0].Member[func].ReturnValue", "test-source"]
- addsTo:
pack: codeql/python-all

View File

@@ -117,3 +117,17 @@ testlib.foo.bar.baz.fuzzyCall(getSource()) # NOT OK
testlib.foo().bar().fuzzyCall(getSource()) # NOT OK
testlib.foo(lambda x: x.fuzzyCall(getSource())) # NOT OK
otherlib.fuzzyCall(getSource()) # OK
# defining sources through content steps
# dictionaries
testlib.source_dict["key"].func() # source
testlib.source_dict["safe"].func() # not a source
lambda k: testlib.source_dict_any[k].func() # source
# TODO: implement support for lists
lambda i: testlib.source_list[i].func()
# TODO: implement support for tuples
testlib.source_tuple[0].func() # a source
testlib.source_tuple[1].func() # not a source