mirror of
https://github.com/github/codeql.git
synced 2025-12-17 01:03:14 +01:00
Python: CG trace: Cache calls seen
This improved runtime from ~10 seconds to 1 seconds when running one of the tests fo wcwidth
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
|
import itertools
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
@@ -23,15 +24,18 @@ def record_calls(code, globals):
|
|||||||
sys.stdout = real_stdout
|
sys.stdout = real_stdout
|
||||||
sys.stderr = real_stderr
|
sys.stderr = real_stderr
|
||||||
|
|
||||||
return sorted(cgt.recorded_calls), captured_stdout, captured_stderr, exit_status
|
all_calls_sorted = sorted(
|
||||||
|
itertools.chain(cgt.python_calls.values(), cgt.external_calls.values())
|
||||||
|
)
|
||||||
|
|
||||||
|
return all_calls_sorted, captured_stdout, captured_stderr, exit_status
|
||||||
|
|
||||||
|
|
||||||
def main(args=None) -> int:
|
def main(args=None) -> int:
|
||||||
logging.basicConfig(stream=sys.stderr, level=logging.INFO)
|
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
|
||||||
|
|
||||||
from . import bytecode_reconstructor
|
# from . import bytecode_reconstructor
|
||||||
|
# logging.getLogger(bytecode_reconstructor.__name__).setLevel(logging.INFO)
|
||||||
logging.getLogger(bytecode_reconstructor.__name__).setLevel(logging.INFO)
|
|
||||||
|
|
||||||
if args is None:
|
if args is None:
|
||||||
# first element in argv is program name
|
# first element in argv is program name
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from types import FrameType
|
from types import FrameType
|
||||||
from typing import Optional
|
from typing import Any, Optional, Tuple
|
||||||
|
|
||||||
from cg_trace.bytecode_reconstructor import BytecodeExpr, expr_from_frame
|
from cg_trace.bytecode_reconstructor import BytecodeExpr, expr_from_frame
|
||||||
|
|
||||||
@@ -65,6 +65,15 @@ class Call:
|
|||||||
bytecode_expr=bytecode_expr,
|
bytecode_expr=bytecode_expr,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def hash_key(frame: FrameType) -> Tuple[str, int, int]:
|
||||||
|
code = frame.f_code
|
||||||
|
return (
|
||||||
|
canonic_filename(code.co_filename),
|
||||||
|
frame.f_lineno,
|
||||||
|
frame.f_lasti,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def better_compare_for_dataclass(cls):
|
def better_compare_for_dataclass(cls):
|
||||||
"""When dataclass is used with `order=True`, the comparison methods is only implemented for
|
"""When dataclass is used with `order=True`, the comparison methods is only implemented for
|
||||||
@@ -162,10 +171,11 @@ class CallGraphTracer:
|
|||||||
use-case, this is not a problem.
|
use-case, this is not a problem.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
recorded_calls: set
|
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.recorded_calls = set()
|
# Performing `Call.from_frame` can be expensive, so we cache (call, callee)
|
||||||
|
# pairs we have already seen to avoid double procressing.
|
||||||
|
self.python_calls = dict()
|
||||||
|
self.external_calls = dict()
|
||||||
|
|
||||||
def run(self, code, globals, locals):
|
def run(self, code, globals, locals):
|
||||||
self.exec_call_seen = False
|
self.exec_call_seen = False
|
||||||
@@ -205,18 +215,35 @@ class CallGraphTracer:
|
|||||||
|
|
||||||
LOGGER.debug(f"profilefunc {event=}")
|
LOGGER.debug(f"profilefunc {event=}")
|
||||||
if event == "call":
|
if event == "call":
|
||||||
assert frame.f_back is not None
|
|
||||||
# in call, the `frame` argument is new the frame for entering the callee
|
# in call, the `frame` argument is new the frame for entering the callee
|
||||||
|
assert frame.f_back is not None
|
||||||
|
|
||||||
callee = PythonCallee.from_frame(frame)
|
callee = PythonCallee.from_frame(frame)
|
||||||
|
|
||||||
|
key = (Call.hash_key(frame.f_back), callee)
|
||||||
|
if key in self.python_calls:
|
||||||
|
LOGGER.debug(f"ignoring already seen call {key[0]} --> {callee}")
|
||||||
|
return
|
||||||
|
|
||||||
LOGGER.debug(f"callee={callee}")
|
LOGGER.debug(f"callee={callee}")
|
||||||
call = Call.from_frame(frame.f_back)
|
call = Call.from_frame(frame.f_back)
|
||||||
|
|
||||||
|
self.python_calls[key] = (call, callee)
|
||||||
|
|
||||||
if event == "c_call":
|
if event == "c_call":
|
||||||
# in c_call, the `frame` argument is frame where the call happens, and the
|
# in c_call, the `frame` argument is frame where the call happens, and the
|
||||||
# `arg` argument is the C function object.
|
# `arg` argument is the C function object.
|
||||||
|
|
||||||
callee = ExternalCallee.from_arg(arg)
|
callee = ExternalCallee.from_arg(arg)
|
||||||
|
|
||||||
|
key = (Call.hash_key(frame), callee)
|
||||||
|
if key in self.external_calls:
|
||||||
|
LOGGER.debug(f"ignoring already seen call {key[0]} --> {callee}")
|
||||||
|
return
|
||||||
|
|
||||||
LOGGER.debug(f"callee={callee}")
|
LOGGER.debug(f"callee={callee}")
|
||||||
call = Call.from_frame(frame)
|
call = Call.from_frame(frame)
|
||||||
|
|
||||||
|
self.external_calls[key] = (call, callee)
|
||||||
|
|
||||||
LOGGER.debug(f"{call} --> {callee}")
|
LOGGER.debug(f"{call} --> {callee}")
|
||||||
self.recorded_calls.add((call, callee))
|
|
||||||
|
|||||||
Reference in New Issue
Block a user