mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Python: CG trace: Cache calls seen
This improved runtime from ~10 seconds to 1 seconds when running one of the tests fo wcwidth
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import itertools
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
@@ -23,15 +24,18 @@ def record_calls(code, globals):
|
||||
sys.stdout = real_stdout
|
||||
sys.stderr = real_stderr
|
||||
|
||||
return sorted(cgt.recorded_calls), captured_stdout, captured_stderr, exit_status
|
||||
all_calls_sorted = sorted(
|
||||
itertools.chain(cgt.python_calls.values(), cgt.external_calls.values())
|
||||
)
|
||||
|
||||
return all_calls_sorted, captured_stdout, captured_stderr, exit_status
|
||||
|
||||
|
||||
def main(args=None) -> int:
|
||||
logging.basicConfig(stream=sys.stderr, level=logging.INFO)
|
||||
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
|
||||
|
||||
from . import bytecode_reconstructor
|
||||
|
||||
logging.getLogger(bytecode_reconstructor.__name__).setLevel(logging.INFO)
|
||||
# from . import bytecode_reconstructor
|
||||
# logging.getLogger(bytecode_reconstructor.__name__).setLevel(logging.INFO)
|
||||
|
||||
if args is None:
|
||||
# first element in argv is program name
|
||||
|
||||
@@ -3,7 +3,7 @@ import logging
|
||||
import os
|
||||
import sys
|
||||
from types import FrameType
|
||||
from typing import Optional
|
||||
from typing import Any, Optional, Tuple
|
||||
|
||||
from cg_trace.bytecode_reconstructor import BytecodeExpr, expr_from_frame
|
||||
|
||||
@@ -65,6 +65,15 @@ class Call:
|
||||
bytecode_expr=bytecode_expr,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def hash_key(frame: FrameType) -> Tuple[str, int, int]:
|
||||
code = frame.f_code
|
||||
return (
|
||||
canonic_filename(code.co_filename),
|
||||
frame.f_lineno,
|
||||
frame.f_lasti,
|
||||
)
|
||||
|
||||
|
||||
def better_compare_for_dataclass(cls):
|
||||
"""When dataclass is used with `order=True`, the comparison methods is only implemented for
|
||||
@@ -162,10 +171,11 @@ class CallGraphTracer:
|
||||
use-case, this is not a problem.
|
||||
"""
|
||||
|
||||
recorded_calls: set
|
||||
|
||||
def __init__(self):
|
||||
self.recorded_calls = set()
|
||||
# Performing `Call.from_frame` can be expensive, so we cache (call, callee)
|
||||
# pairs we have already seen to avoid double procressing.
|
||||
self.python_calls = dict()
|
||||
self.external_calls = dict()
|
||||
|
||||
def run(self, code, globals, locals):
|
||||
self.exec_call_seen = False
|
||||
@@ -205,18 +215,35 @@ class CallGraphTracer:
|
||||
|
||||
LOGGER.debug(f"profilefunc {event=}")
|
||||
if event == "call":
|
||||
assert frame.f_back is not None
|
||||
# in call, the `frame` argument is new the frame for entering the callee
|
||||
assert frame.f_back is not None
|
||||
|
||||
callee = PythonCallee.from_frame(frame)
|
||||
|
||||
key = (Call.hash_key(frame.f_back), callee)
|
||||
if key in self.python_calls:
|
||||
LOGGER.debug(f"ignoring already seen call {key[0]} --> {callee}")
|
||||
return
|
||||
|
||||
LOGGER.debug(f"callee={callee}")
|
||||
call = Call.from_frame(frame.f_back)
|
||||
|
||||
self.python_calls[key] = (call, callee)
|
||||
|
||||
if event == "c_call":
|
||||
# in c_call, the `frame` argument is frame where the call happens, and the
|
||||
# `arg` argument is the C function object.
|
||||
|
||||
callee = ExternalCallee.from_arg(arg)
|
||||
|
||||
key = (Call.hash_key(frame), callee)
|
||||
if key in self.external_calls:
|
||||
LOGGER.debug(f"ignoring already seen call {key[0]} --> {callee}")
|
||||
return
|
||||
|
||||
LOGGER.debug(f"callee={callee}")
|
||||
call = Call.from_frame(frame)
|
||||
|
||||
self.external_calls[key] = (call, callee)
|
||||
|
||||
LOGGER.debug(f"{call} --> {callee}")
|
||||
self.recorded_calls.add((call, callee))
|
||||
|
||||
Reference in New Issue
Block a user