mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Python: CG trace: Improve performance by only logging when needed
Seems like a 2x performance overall wcwidth: - DEBUG=True 5.78 seconds - DEBUG=False 2.70 seconds youtube-dl - DEBUG=True 238.90 seconds - DEBUG=False 120.70 seconds
This commit is contained in:
@@ -5,6 +5,7 @@ from dis import Instruction
|
|||||||
from types import FrameType
|
from types import FrameType
|
||||||
from typing import Any, List
|
from typing import Any, List
|
||||||
|
|
||||||
|
from cg_trace.settings import DEBUG, FAIL_ON_UNKNOWN_BYTECODE
|
||||||
from cg_trace.utils import better_compare_for_dataclass
|
from cg_trace.utils import better_compare_for_dataclass
|
||||||
|
|
||||||
LOGGER = logging.getLogger(__name__)
|
LOGGER = logging.getLogger(__name__)
|
||||||
@@ -155,9 +156,10 @@ def expr_that_added_elem_to_stack(
|
|||||||
immediately. (since correctly process the bytecode when faced with jumps is not as
|
immediately. (since correctly process the bytecode when faced with jumps is not as
|
||||||
straight forward).
|
straight forward).
|
||||||
"""
|
"""
|
||||||
LOGGER.debug(
|
if DEBUG:
|
||||||
f"find_inst_that_added_elem_to_stack start_index={start_index} stack_pos={stack_pos}"
|
LOGGER.debug(
|
||||||
)
|
f"find_inst_that_added_elem_to_stack start_index={start_index} stack_pos={stack_pos}"
|
||||||
|
)
|
||||||
assert stack_pos >= 0
|
assert stack_pos >= 0
|
||||||
for inst in reversed(instructions[: start_index + 1]):
|
for inst in reversed(instructions[: start_index + 1]):
|
||||||
# Return immediately if faced with a jump
|
# Return immediately if faced with a jump
|
||||||
@@ -165,13 +167,15 @@ def expr_that_added_elem_to_stack(
|
|||||||
return SomethingInvolvingScaryBytecodeJump(inst.opname)
|
return SomethingInvolvingScaryBytecodeJump(inst.opname)
|
||||||
|
|
||||||
if stack_pos == 0:
|
if stack_pos == 0:
|
||||||
LOGGER.debug(f"Found it: {inst}")
|
if DEBUG:
|
||||||
|
LOGGER.debug(f"Found it: {inst}")
|
||||||
found_index = instructions.index(inst)
|
found_index = instructions.index(inst)
|
||||||
break
|
break
|
||||||
old = stack_pos
|
old = stack_pos
|
||||||
stack_pos -= dis.stack_effect(inst.opcode, inst.arg)
|
stack_pos -= dis.stack_effect(inst.opcode, inst.arg)
|
||||||
new = stack_pos
|
new = stack_pos
|
||||||
LOGGER.debug(f"Skipping ({old} -> {new}) {inst}")
|
if DEBUG:
|
||||||
|
LOGGER.debug(f"Skipping ({old} -> {new}) {inst}")
|
||||||
else:
|
else:
|
||||||
raise Exception("inst_index_for_stack_diff failed")
|
raise Exception("inst_index_for_stack_diff failed")
|
||||||
|
|
||||||
@@ -181,7 +185,8 @@ def expr_that_added_elem_to_stack(
|
|||||||
def expr_from_instruction(instructions: List[Instruction], index: int) -> BytecodeExpr:
|
def expr_from_instruction(instructions: List[Instruction], index: int) -> BytecodeExpr:
|
||||||
inst = instructions[index]
|
inst = instructions[index]
|
||||||
|
|
||||||
LOGGER.debug(f"expr_from_instruction: {inst} index={index}")
|
if DEBUG:
|
||||||
|
LOGGER.debug(f"expr_from_instruction: {inst} index={index}")
|
||||||
|
|
||||||
if inst.opname in ["LOAD_GLOBAL", "LOAD_FAST", "LOAD_NAME", "LOAD_DEREF"]:
|
if inst.opname in ["LOAD_GLOBAL", "LOAD_FAST", "LOAD_NAME", "LOAD_DEREF"]:
|
||||||
return BytecodeVariableName(inst.argval)
|
return BytecodeVariableName(inst.argval)
|
||||||
@@ -247,24 +252,23 @@ def expr_from_instruction(instructions: List[Instruction], index: int) -> Byteco
|
|||||||
# - LOAD_BUILD_CLASS: Called when constructing a class.
|
# - LOAD_BUILD_CLASS: Called when constructing a class.
|
||||||
# - IMPORT_NAME: Observed to result in a call to filename='<frozen
|
# - IMPORT_NAME: Observed to result in a call to filename='<frozen
|
||||||
# importlib._bootstrap>', linenum=389, funcname='parent'
|
# importlib._bootstrap>', linenum=389, funcname='parent'
|
||||||
if inst.opname not in ["LOAD_BUILD_CLASS", "IMPORT_NAME"] + WITH_OPNAMES:
|
if FAIL_ON_UNKNOWN_BYTECODE:
|
||||||
LOGGER.warning(
|
if inst.opname not in ["LOAD_BUILD_CLASS", "IMPORT_NAME"] + WITH_OPNAMES:
|
||||||
f"Don't know how to handle this type of instruction: {inst.opname}"
|
LOGGER.warning(
|
||||||
)
|
f"Don't know how to handle this type of instruction: {inst.opname}"
|
||||||
# Uncomment to stop execution when encountering non-ignored unknown instruction
|
)
|
||||||
# class MyBytecodeException(BaseException):
|
raise BaseException()
|
||||||
# pass
|
|
||||||
#
|
|
||||||
# raise MyBytecodeException()
|
|
||||||
return BytecodeUnknown(inst.opname)
|
return BytecodeUnknown(inst.opname)
|
||||||
|
|
||||||
|
|
||||||
def expr_from_frame(frame: FrameType) -> BytecodeExpr:
|
def expr_from_frame(frame: FrameType) -> BytecodeExpr:
|
||||||
bytecode = dis.Bytecode(frame.f_code, current_offset=frame.f_lasti)
|
bytecode = dis.Bytecode(frame.f_code, current_offset=frame.f_lasti)
|
||||||
|
|
||||||
LOGGER.debug(
|
if DEBUG:
|
||||||
f"{frame.f_code.co_filename}:{frame.f_lineno}: bytecode: \n{bytecode.dis()}"
|
LOGGER.debug(
|
||||||
)
|
f"{frame.f_code.co_filename}:{frame.f_lineno}: bytecode: \n{bytecode.dis()}"
|
||||||
|
)
|
||||||
|
|
||||||
instructions = list(iter(bytecode))
|
instructions = list(iter(bytecode))
|
||||||
last_instruction_index = [inst.offset for inst in instructions].index(frame.f_lasti)
|
last_instruction_index = [inst.offset for inst in instructions].index(frame.f_lasti)
|
||||||
|
|||||||
@@ -4,6 +4,10 @@ import argparse
|
|||||||
def parse(args):
|
def parse(args):
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--debug", action="store_true", default=False, help="Enable debug logging"
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument("--xml")
|
parser.add_argument("--xml")
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ import time
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
|
||||||
from cg_trace import __version__, cmdline, tracer
|
from cg_trace import __version__, cmdline, settings, tracer
|
||||||
from cg_trace.exporter import XMLExporter
|
from cg_trace.exporter import XMLExporter
|
||||||
|
|
||||||
|
|
||||||
@@ -31,18 +31,17 @@ def record_calls(code, globals):
|
|||||||
return all_calls_sorted, captured_stdout, captured_stderr, exit_status
|
return all_calls_sorted, captured_stdout, captured_stderr, exit_status
|
||||||
|
|
||||||
|
|
||||||
def setup_logging():
|
def setup_logging(debug):
|
||||||
# code we run can also set up logging, so we need to set the level directly on our
|
# code we run can also set up logging, so we need to set the level directly on our
|
||||||
# own pacakge
|
# own pacakge
|
||||||
sh = logging.StreamHandler(stream=sys.stderr)
|
sh = logging.StreamHandler(stream=sys.stderr)
|
||||||
|
|
||||||
pkg_logger = logging.getLogger("cg_trace")
|
pkg_logger = logging.getLogger("cg_trace")
|
||||||
pkg_logger.addHandler(sh)
|
pkg_logger.addHandler(sh)
|
||||||
pkg_logger.setLevel(logging.INFO)
|
pkg_logger.setLevel(logging.CRITICAL if debug else logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
def main(args=None) -> int:
|
def main(args=None) -> int:
|
||||||
setup_logging()
|
|
||||||
|
|
||||||
# from . import bytecode_reconstructor
|
# from . import bytecode_reconstructor
|
||||||
# logging.getLogger(bytecode_reconstructor.__name__).setLevel(logging.INFO)
|
# logging.getLogger(bytecode_reconstructor.__name__).setLevel(logging.INFO)
|
||||||
@@ -53,6 +52,9 @@ def main(args=None) -> int:
|
|||||||
|
|
||||||
opts = cmdline.parse(args)
|
opts = cmdline.parse(args)
|
||||||
|
|
||||||
|
settings.DEBUG = opts.debug
|
||||||
|
setup_logging(opts.debug)
|
||||||
|
|
||||||
# These details of setting up the program to be run is very much inspired by `trace`
|
# These details of setting up the program to be run is very much inspired by `trace`
|
||||||
# from the standard library
|
# from the standard library
|
||||||
if opts.module:
|
if opts.module:
|
||||||
|
|||||||
@@ -0,0 +1,6 @@
|
|||||||
|
# Whether to run the call graph tracer with debugging enabled. Turning off
|
||||||
|
# `if DEBUG: LOGGER.debug()` code completely yielded massive performance improvements.
|
||||||
|
DEBUG = False
|
||||||
|
|
||||||
|
|
||||||
|
FAIL_ON_UNKNOWN_BYTECODE = False
|
||||||
@@ -6,6 +6,7 @@ from types import FrameType
|
|||||||
from typing import Any, Optional, Tuple
|
from typing import Any, Optional, Tuple
|
||||||
|
|
||||||
from cg_trace.bytecode_reconstructor import BytecodeExpr, expr_from_frame
|
from cg_trace.bytecode_reconstructor import BytecodeExpr, expr_from_frame
|
||||||
|
from cg_trace.settings import DEBUG
|
||||||
from cg_trace.utils import better_compare_for_dataclass
|
from cg_trace.utils import better_compare_for_dataclass
|
||||||
|
|
||||||
LOGGER = logging.getLogger(__name__)
|
LOGGER = logging.getLogger(__name__)
|
||||||
@@ -233,7 +234,8 @@ class CallGraphTracer:
|
|||||||
if event not in ["call", "c_call"]:
|
if event not in ["call", "c_call"]:
|
||||||
return
|
return
|
||||||
|
|
||||||
LOGGER.debug(f"profilefunc event={event}")
|
if DEBUG:
|
||||||
|
LOGGER.debug(f"profilefunc event={event}")
|
||||||
if event == "call":
|
if event == "call":
|
||||||
# in call, the `frame` argument is new the frame for entering the callee
|
# in call, the `frame` argument is new the frame for entering the callee
|
||||||
assert frame.f_back is not None
|
assert frame.f_back is not None
|
||||||
@@ -242,10 +244,12 @@ class CallGraphTracer:
|
|||||||
|
|
||||||
key = (Call.hash_key(frame.f_back), callee)
|
key = (Call.hash_key(frame.f_back), callee)
|
||||||
if key in self.python_calls:
|
if key in self.python_calls:
|
||||||
LOGGER.debug(f"ignoring already seen call {key[0]} --> {callee}")
|
if DEBUG:
|
||||||
|
LOGGER.debug(f"ignoring already seen call {key[0]} --> {callee}")
|
||||||
return
|
return
|
||||||
|
|
||||||
LOGGER.debug(f"callee={callee}")
|
if DEBUG:
|
||||||
|
LOGGER.debug(f"callee={callee}")
|
||||||
call = Call.from_frame(frame.f_back)
|
call = Call.from_frame(frame.f_back)
|
||||||
|
|
||||||
self.python_calls[key] = (call, callee)
|
self.python_calls[key] = (call, callee)
|
||||||
@@ -258,12 +262,15 @@ class CallGraphTracer:
|
|||||||
|
|
||||||
key = (Call.hash_key(frame), callee)
|
key = (Call.hash_key(frame), callee)
|
||||||
if key in self.external_calls:
|
if key in self.external_calls:
|
||||||
LOGGER.debug(f"ignoring already seen call {key[0]} --> {callee}")
|
if DEBUG:
|
||||||
|
LOGGER.debug(f"ignoring already seen call {key[0]} --> {callee}")
|
||||||
return
|
return
|
||||||
|
|
||||||
LOGGER.debug(f"callee={callee}")
|
if DEBUG:
|
||||||
|
LOGGER.debug(f"callee={callee}")
|
||||||
call = Call.from_frame(frame)
|
call = Call.from_frame(frame)
|
||||||
|
|
||||||
self.external_calls[key] = (call, callee)
|
self.external_calls[key] = (call, callee)
|
||||||
|
|
||||||
LOGGER.debug(f"{call} --> {callee}")
|
if DEBUG:
|
||||||
|
LOGGER.debug(f"{call} --> {callee}")
|
||||||
|
|||||||
Reference in New Issue
Block a user