mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Python: CG trace: Improve performance by only logging when needed
Seems like a 2x performance overall wcwidth: - DEBUG=True 5.78 seconds - DEBUG=False 2.70 seconds youtube-dl - DEBUG=True 238.90 seconds - DEBUG=False 120.70 seconds
This commit is contained in:
@@ -5,6 +5,7 @@ from dis import Instruction
|
||||
from types import FrameType
|
||||
from typing import Any, List
|
||||
|
||||
from cg_trace.settings import DEBUG, FAIL_ON_UNKNOWN_BYTECODE
|
||||
from cg_trace.utils import better_compare_for_dataclass
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
@@ -155,9 +156,10 @@ def expr_that_added_elem_to_stack(
|
||||
immediately. (since correctly process the bytecode when faced with jumps is not as
|
||||
straight forward).
|
||||
"""
|
||||
LOGGER.debug(
|
||||
f"find_inst_that_added_elem_to_stack start_index={start_index} stack_pos={stack_pos}"
|
||||
)
|
||||
if DEBUG:
|
||||
LOGGER.debug(
|
||||
f"find_inst_that_added_elem_to_stack start_index={start_index} stack_pos={stack_pos}"
|
||||
)
|
||||
assert stack_pos >= 0
|
||||
for inst in reversed(instructions[: start_index + 1]):
|
||||
# Return immediately if faced with a jump
|
||||
@@ -165,13 +167,15 @@ def expr_that_added_elem_to_stack(
|
||||
return SomethingInvolvingScaryBytecodeJump(inst.opname)
|
||||
|
||||
if stack_pos == 0:
|
||||
LOGGER.debug(f"Found it: {inst}")
|
||||
if DEBUG:
|
||||
LOGGER.debug(f"Found it: {inst}")
|
||||
found_index = instructions.index(inst)
|
||||
break
|
||||
old = stack_pos
|
||||
stack_pos -= dis.stack_effect(inst.opcode, inst.arg)
|
||||
new = stack_pos
|
||||
LOGGER.debug(f"Skipping ({old} -> {new}) {inst}")
|
||||
if DEBUG:
|
||||
LOGGER.debug(f"Skipping ({old} -> {new}) {inst}")
|
||||
else:
|
||||
raise Exception("inst_index_for_stack_diff failed")
|
||||
|
||||
@@ -181,7 +185,8 @@ def expr_that_added_elem_to_stack(
|
||||
def expr_from_instruction(instructions: List[Instruction], index: int) -> BytecodeExpr:
|
||||
inst = instructions[index]
|
||||
|
||||
LOGGER.debug(f"expr_from_instruction: {inst} index={index}")
|
||||
if DEBUG:
|
||||
LOGGER.debug(f"expr_from_instruction: {inst} index={index}")
|
||||
|
||||
if inst.opname in ["LOAD_GLOBAL", "LOAD_FAST", "LOAD_NAME", "LOAD_DEREF"]:
|
||||
return BytecodeVariableName(inst.argval)
|
||||
@@ -247,24 +252,23 @@ def expr_from_instruction(instructions: List[Instruction], index: int) -> Byteco
|
||||
# - LOAD_BUILD_CLASS: Called when constructing a class.
|
||||
# - IMPORT_NAME: Observed to result in a call to filename='<frozen
|
||||
# importlib._bootstrap>', linenum=389, funcname='parent'
|
||||
if inst.opname not in ["LOAD_BUILD_CLASS", "IMPORT_NAME"] + WITH_OPNAMES:
|
||||
LOGGER.warning(
|
||||
f"Don't know how to handle this type of instruction: {inst.opname}"
|
||||
)
|
||||
# Uncomment to stop execution when encountering non-ignored unknown instruction
|
||||
# class MyBytecodeException(BaseException):
|
||||
# pass
|
||||
#
|
||||
# raise MyBytecodeException()
|
||||
if FAIL_ON_UNKNOWN_BYTECODE:
|
||||
if inst.opname not in ["LOAD_BUILD_CLASS", "IMPORT_NAME"] + WITH_OPNAMES:
|
||||
LOGGER.warning(
|
||||
f"Don't know how to handle this type of instruction: {inst.opname}"
|
||||
)
|
||||
raise BaseException()
|
||||
|
||||
return BytecodeUnknown(inst.opname)
|
||||
|
||||
|
||||
def expr_from_frame(frame: FrameType) -> BytecodeExpr:
|
||||
bytecode = dis.Bytecode(frame.f_code, current_offset=frame.f_lasti)
|
||||
|
||||
LOGGER.debug(
|
||||
f"{frame.f_code.co_filename}:{frame.f_lineno}: bytecode: \n{bytecode.dis()}"
|
||||
)
|
||||
if DEBUG:
|
||||
LOGGER.debug(
|
||||
f"{frame.f_code.co_filename}:{frame.f_lineno}: bytecode: \n{bytecode.dis()}"
|
||||
)
|
||||
|
||||
instructions = list(iter(bytecode))
|
||||
last_instruction_index = [inst.offset for inst in instructions].index(frame.f_lasti)
|
||||
|
||||
@@ -4,6 +4,10 @@ import argparse
|
||||
def parse(args):
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument(
|
||||
"--debug", action="store_true", default=False, help="Enable debug logging"
|
||||
)
|
||||
|
||||
parser.add_argument("--xml")
|
||||
|
||||
parser.add_argument(
|
||||
|
||||
@@ -6,7 +6,7 @@ import time
|
||||
from datetime import datetime
|
||||
from io import StringIO
|
||||
|
||||
from cg_trace import __version__, cmdline, tracer
|
||||
from cg_trace import __version__, cmdline, settings, tracer
|
||||
from cg_trace.exporter import XMLExporter
|
||||
|
||||
|
||||
@@ -31,18 +31,17 @@ def record_calls(code, globals):
|
||||
return all_calls_sorted, captured_stdout, captured_stderr, exit_status
|
||||
|
||||
|
||||
def setup_logging():
|
||||
def setup_logging(debug):
|
||||
# code we run can also set up logging, so we need to set the level directly on our
|
||||
# own pacakge
|
||||
sh = logging.StreamHandler(stream=sys.stderr)
|
||||
|
||||
pkg_logger = logging.getLogger("cg_trace")
|
||||
pkg_logger.addHandler(sh)
|
||||
pkg_logger.setLevel(logging.INFO)
|
||||
pkg_logger.setLevel(logging.CRITICAL if debug else logging.INFO)
|
||||
|
||||
|
||||
def main(args=None) -> int:
|
||||
setup_logging()
|
||||
|
||||
# from . import bytecode_reconstructor
|
||||
# logging.getLogger(bytecode_reconstructor.__name__).setLevel(logging.INFO)
|
||||
@@ -53,6 +52,9 @@ def main(args=None) -> int:
|
||||
|
||||
opts = cmdline.parse(args)
|
||||
|
||||
settings.DEBUG = opts.debug
|
||||
setup_logging(opts.debug)
|
||||
|
||||
# These details of setting up the program to be run is very much inspired by `trace`
|
||||
# from the standard library
|
||||
if opts.module:
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
# Whether to run the call graph tracer with debugging enabled. Turning off
|
||||
# `if DEBUG: LOGGER.debug()` code completely yielded massive performance improvements.
|
||||
DEBUG = False
|
||||
|
||||
|
||||
FAIL_ON_UNKNOWN_BYTECODE = False
|
||||
@@ -6,6 +6,7 @@ from types import FrameType
|
||||
from typing import Any, Optional, Tuple
|
||||
|
||||
from cg_trace.bytecode_reconstructor import BytecodeExpr, expr_from_frame
|
||||
from cg_trace.settings import DEBUG
|
||||
from cg_trace.utils import better_compare_for_dataclass
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
@@ -233,7 +234,8 @@ class CallGraphTracer:
|
||||
if event not in ["call", "c_call"]:
|
||||
return
|
||||
|
||||
LOGGER.debug(f"profilefunc event={event}")
|
||||
if DEBUG:
|
||||
LOGGER.debug(f"profilefunc event={event}")
|
||||
if event == "call":
|
||||
# in call, the `frame` argument is new the frame for entering the callee
|
||||
assert frame.f_back is not None
|
||||
@@ -242,10 +244,12 @@ class CallGraphTracer:
|
||||
|
||||
key = (Call.hash_key(frame.f_back), callee)
|
||||
if key in self.python_calls:
|
||||
LOGGER.debug(f"ignoring already seen call {key[0]} --> {callee}")
|
||||
if DEBUG:
|
||||
LOGGER.debug(f"ignoring already seen call {key[0]} --> {callee}")
|
||||
return
|
||||
|
||||
LOGGER.debug(f"callee={callee}")
|
||||
if DEBUG:
|
||||
LOGGER.debug(f"callee={callee}")
|
||||
call = Call.from_frame(frame.f_back)
|
||||
|
||||
self.python_calls[key] = (call, callee)
|
||||
@@ -258,12 +262,15 @@ class CallGraphTracer:
|
||||
|
||||
key = (Call.hash_key(frame), callee)
|
||||
if key in self.external_calls:
|
||||
LOGGER.debug(f"ignoring already seen call {key[0]} --> {callee}")
|
||||
if DEBUG:
|
||||
LOGGER.debug(f"ignoring already seen call {key[0]} --> {callee}")
|
||||
return
|
||||
|
||||
LOGGER.debug(f"callee={callee}")
|
||||
if DEBUG:
|
||||
LOGGER.debug(f"callee={callee}")
|
||||
call = Call.from_frame(frame)
|
||||
|
||||
self.external_calls[key] = (call, callee)
|
||||
|
||||
LOGGER.debug(f"{call} --> {callee}")
|
||||
if DEBUG:
|
||||
LOGGER.debug(f"{call} --> {callee}")
|
||||
|
||||
Reference in New Issue
Block a user