diff --git a/python/tools/recorded-call-graph-metrics/ql/lib/RecordedCalls.qll b/python/tools/recorded-call-graph-metrics/ql/lib/RecordedCalls.qll index 5566b8d3bd2..93aa8b0a604 100644 --- a/python/tools/recorded-call-graph-metrics/ql/lib/RecordedCalls.qll +++ b/python/tools/recorded-call-graph-metrics/ql/lib/RecordedCalls.qll @@ -135,12 +135,8 @@ class XMLExternalCallee extends XMLCallee { Builtin getACallee() { exists(Builtin mod | - not this.get_module_data() = "None" and mod.isModule() and mod.getName() = this.get_module_data() - or - this.get_module_data() = "None" and - mod = Builtin::builtinModule() | result = traverse_qualname(mod, this.get_qualname_data()) ) diff --git a/python/tools/recorded-call-graph-metrics/src/cg_trace/tracer.py b/python/tools/recorded-call-graph-metrics/src/cg_trace/tracer.py index 6f960e835be..edd38cc8094 100644 --- a/python/tools/recorded-call-graph-metrics/src/cg_trace/tracer.py +++ b/python/tools/recorded-call-graph-metrics/src/cg_trace/tracer.py @@ -95,6 +95,41 @@ class Callee: BUILTIN_FUNCTION_OR_METHOD = type(print) +METHOD_DESCRIPTOR_TYPE = type(dict.get) + + +_unknown_module_fixup_cache = dict() + + +def _unkown_module_fixup(func): + # TODO: Doesn't work for everything (for example: `OrderedDict.fromkeys`, `object.__new__`) + + module = func.__module__ + qualname = func.__qualname__ + cls_name, method_name = qualname.split(".") + + key = (module, qualname) + if key in _unknown_module_fixup_cache: + return _unknown_module_fixup_cache[key] + + matching_classes = list() + for klass in object.__subclasses__(): + # type(dict.get) == METHOD_DESCRIPTOR_TYPE + # type(dict.__new__) == BUILTIN_FUNCTION_OR_METHOD + if klass.__qualname__ == cls_name and type( + getattr(klass, method_name, None) + ) in [BUILTIN_FUNCTION_OR_METHOD, METHOD_DESCRIPTOR_TYPE]: + matching_classes.append(klass) + + if len(matching_classes) == 1: + klass = matching_classes[0] + ret = klass.__module__ + else: + if DEBUG: + LOGGER.debug(f"Found more than one matching class for {module} {qualname}") + ret = None + _unknown_module_fixup_cache[key] = ret + return ret @better_compare_for_dataclass @@ -109,9 +144,19 @@ class ExternalCallee(Callee): @classmethod def from_arg(cls, func): + # builtin bound methods seems to always return `None` for __module__, but we + # might be able to recover the lost information by looking through all classes. + # For example, `dict().get.__module__ is None` and `dict().get.__qualname__ == + # "dict.get"` + + module = func.__module__ + qualname = func.__qualname__ + if module is None and qualname.count(".") == 1: + module = _unkown_module_fixup(func) + return cls( - module=func.__module__, - qualname=func.__qualname__, + module=module, + qualname=qualname, is_builtin=type(func) == BUILTIN_FUNCTION_OR_METHOD, ) diff --git a/python/tools/recorded-call-graph-metrics/tests/python-src/dict-get.py b/python/tools/recorded-call-graph-metrics/tests/python-src/dict-get.py new file mode 100644 index 00000000000..0c436495fce --- /dev/null +++ b/python/tools/recorded-call-graph-metrics/tests/python-src/dict-get.py @@ -0,0 +1,3 @@ +d = dict() + +d.get("foo") or d.get("bar") diff --git a/python/tools/recorded-call-graph-metrics/tests/python-src/getsockname.py b/python/tools/recorded-call-graph-metrics/tests/python-src/getsockname.py new file mode 100644 index 00000000000..8570782d529 --- /dev/null +++ b/python/tools/recorded-call-graph-metrics/tests/python-src/getsockname.py @@ -0,0 +1,4 @@ +import socket + +sock = socket.socket() +print(sock.getsockname()) diff --git a/python/tools/recorded-call-graph-metrics/tests/python-src/io-builtin.py b/python/tools/recorded-call-graph-metrics/tests/python-src/io-builtin.py new file mode 100644 index 00000000000..daf3b0069ea --- /dev/null +++ b/python/tools/recorded-call-graph-metrics/tests/python-src/io-builtin.py @@ -0,0 +1,4 @@ +import io + +# the `io.open` is just an alias for `_io.open`, but we record the external callee as `io.open` :| +io.open("foo")