mirror of
https://github.com/github/codeql.git
synced 2025-12-17 01:03:14 +01:00
Python: CG trace: reconstruct call expr from bytecode
So we can differentiate multiple calls in one line.
This commit is contained in:
@@ -1,6 +1,105 @@
|
|||||||
<root>
|
<root>
|
||||||
<recorded_call call_filename="/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/builtins.py" call_linenum="1" call_inst_index="4" externalcallee_module="builtins" externalcallee_qualname="print" externalcallee_is_builtin="True"/>
|
<info>
|
||||||
<recorded_call call_filename="/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/builtins.py" call_linenum="2" call_inst_index="12" externalcallee_module="builtins" externalcallee_qualname="len" externalcallee_is_builtin="True"/>
|
<cg_trace_version>0.0.2</cg_trace_version>
|
||||||
<recorded_call call_filename="/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/builtins.py" call_linenum="4" call_inst_index="28" externalcallee_module="None" externalcallee_qualname="list.append" externalcallee_is_builtin="True"/>
|
<args>--xml example-traces/builtins.xml example/builtins.py</args>
|
||||||
<recorded_call call_filename="/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/builtins.py" call_linenum="7" call_inst_index="44" externalcallee_module="sys" externalcallee_qualname="getdefaultencoding" externalcallee_is_builtin="True"/>
|
<exit_status>completed</exit_status>
|
||||||
|
<elapsed>0.00 seconds</elapsed>
|
||||||
|
<utctimestamp>2020-07-20T09:27:00</utctimestamp>
|
||||||
|
</info>
|
||||||
|
<recorded_calls>
|
||||||
|
<recorded_call>
|
||||||
|
<Call>
|
||||||
|
<filename>/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/builtins.py</filename>
|
||||||
|
<linenum>1</linenum>
|
||||||
|
<inst_index>4</inst_index>
|
||||||
|
<bytecode_expr>
|
||||||
|
<BytecodeCall>
|
||||||
|
<function>
|
||||||
|
<BytecodeVariableName>
|
||||||
|
<name>print</name>
|
||||||
|
</BytecodeVariableName>
|
||||||
|
</function>
|
||||||
|
</BytecodeCall>
|
||||||
|
</bytecode_expr>
|
||||||
|
</Call>
|
||||||
|
<ExternalCallee>
|
||||||
|
<module>builtins</module>
|
||||||
|
<qualname>print</qualname>
|
||||||
|
<is_builtin>True</is_builtin>
|
||||||
|
</ExternalCallee>
|
||||||
|
</recorded_call>
|
||||||
|
<recorded_call>
|
||||||
|
<Call>
|
||||||
|
<filename>/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/builtins.py</filename>
|
||||||
|
<linenum>2</linenum>
|
||||||
|
<inst_index>12</inst_index>
|
||||||
|
<bytecode_expr>
|
||||||
|
<BytecodeCall>
|
||||||
|
<function>
|
||||||
|
<BytecodeVariableName>
|
||||||
|
<name>len</name>
|
||||||
|
</BytecodeVariableName>
|
||||||
|
</function>
|
||||||
|
</BytecodeCall>
|
||||||
|
</bytecode_expr>
|
||||||
|
</Call>
|
||||||
|
<ExternalCallee>
|
||||||
|
<module>builtins</module>
|
||||||
|
<qualname>len</qualname>
|
||||||
|
<is_builtin>True</is_builtin>
|
||||||
|
</ExternalCallee>
|
||||||
|
</recorded_call>
|
||||||
|
<recorded_call>
|
||||||
|
<Call>
|
||||||
|
<filename>/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/builtins.py</filename>
|
||||||
|
<linenum>4</linenum>
|
||||||
|
<inst_index>28</inst_index>
|
||||||
|
<bytecode_expr>
|
||||||
|
<BytecodeCall>
|
||||||
|
<function>
|
||||||
|
<BytecodeAttribute>
|
||||||
|
<attr_name>append</attr_name>
|
||||||
|
<object>
|
||||||
|
<BytecodeVariableName>
|
||||||
|
<name>l</name>
|
||||||
|
</BytecodeVariableName>
|
||||||
|
</object>
|
||||||
|
</BytecodeAttribute>
|
||||||
|
</function>
|
||||||
|
</BytecodeCall>
|
||||||
|
</bytecode_expr>
|
||||||
|
</Call>
|
||||||
|
<ExternalCallee>
|
||||||
|
<module>None</module>
|
||||||
|
<qualname>list.append</qualname>
|
||||||
|
<is_builtin>True</is_builtin>
|
||||||
|
</ExternalCallee>
|
||||||
|
</recorded_call>
|
||||||
|
<recorded_call>
|
||||||
|
<Call>
|
||||||
|
<filename>/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/builtins.py</filename>
|
||||||
|
<linenum>7</linenum>
|
||||||
|
<inst_index>44</inst_index>
|
||||||
|
<bytecode_expr>
|
||||||
|
<BytecodeCall>
|
||||||
|
<function>
|
||||||
|
<BytecodeAttribute>
|
||||||
|
<attr_name>getdefaultencoding</attr_name>
|
||||||
|
<object>
|
||||||
|
<BytecodeVariableName>
|
||||||
|
<name>sys</name>
|
||||||
|
</BytecodeVariableName>
|
||||||
|
</object>
|
||||||
|
</BytecodeAttribute>
|
||||||
|
</function>
|
||||||
|
</BytecodeCall>
|
||||||
|
</bytecode_expr>
|
||||||
|
</Call>
|
||||||
|
<ExternalCallee>
|
||||||
|
<module>sys</module>
|
||||||
|
<qualname>getdefaultencoding</qualname>
|
||||||
|
<is_builtin>True</is_builtin>
|
||||||
|
</ExternalCallee>
|
||||||
|
</recorded_call>
|
||||||
|
</recorded_calls>
|
||||||
</root>
|
</root>
|
||||||
|
|||||||
@@ -1,8 +1,137 @@
|
|||||||
<root>
|
<root>
|
||||||
<recorded_call call_filename="/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py" call_linenum="2" call_inst_index="4" externalcallee_module="builtins" externalcallee_qualname="print" externalcallee_is_builtin="True"/>
|
<info>
|
||||||
<recorded_call call_filename="/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py" call_linenum="5" call_inst_index="4" externalcallee_module="builtins" externalcallee_qualname="print" externalcallee_is_builtin="True"/>
|
<cg_trace_version>0.0.2</cg_trace_version>
|
||||||
<recorded_call call_filename="/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py" call_linenum="7" call_inst_index="18" pythoncallee_filename="/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py" pythoncallee_linenum="1" pythoncallee_funcname="foo"/>
|
<args>--xml example-traces/simple.xml example/simple.py</args>
|
||||||
<recorded_call call_filename="/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py" call_linenum="8" call_inst_index="24" pythoncallee_filename="/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py" pythoncallee_linenum="4" pythoncallee_funcname="bar"/>
|
<exit_status>completed</exit_status>
|
||||||
<recorded_call call_filename="/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py" call_linenum="10" call_inst_index="30" pythoncallee_filename="/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py" pythoncallee_linenum="1" pythoncallee_funcname="foo"/>
|
<elapsed>0.00 seconds</elapsed>
|
||||||
<recorded_call call_filename="/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py" call_linenum="10" call_inst_index="36" pythoncallee_filename="/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py" pythoncallee_linenum="4" pythoncallee_funcname="bar"/>
|
<utctimestamp>2020-07-20T09:27:01</utctimestamp>
|
||||||
|
</info>
|
||||||
|
<recorded_calls>
|
||||||
|
<recorded_call>
|
||||||
|
<Call>
|
||||||
|
<filename>/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py</filename>
|
||||||
|
<linenum>2</linenum>
|
||||||
|
<inst_index>4</inst_index>
|
||||||
|
<bytecode_expr>
|
||||||
|
<BytecodeCall>
|
||||||
|
<function>
|
||||||
|
<BytecodeVariableName>
|
||||||
|
<name>print</name>
|
||||||
|
</BytecodeVariableName>
|
||||||
|
</function>
|
||||||
|
</BytecodeCall>
|
||||||
|
</bytecode_expr>
|
||||||
|
</Call>
|
||||||
|
<ExternalCallee>
|
||||||
|
<module>builtins</module>
|
||||||
|
<qualname>print</qualname>
|
||||||
|
<is_builtin>True</is_builtin>
|
||||||
|
</ExternalCallee>
|
||||||
|
</recorded_call>
|
||||||
|
<recorded_call>
|
||||||
|
<Call>
|
||||||
|
<filename>/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py</filename>
|
||||||
|
<linenum>5</linenum>
|
||||||
|
<inst_index>4</inst_index>
|
||||||
|
<bytecode_expr>
|
||||||
|
<BytecodeCall>
|
||||||
|
<function>
|
||||||
|
<BytecodeVariableName>
|
||||||
|
<name>print</name>
|
||||||
|
</BytecodeVariableName>
|
||||||
|
</function>
|
||||||
|
</BytecodeCall>
|
||||||
|
</bytecode_expr>
|
||||||
|
</Call>
|
||||||
|
<ExternalCallee>
|
||||||
|
<module>builtins</module>
|
||||||
|
<qualname>print</qualname>
|
||||||
|
<is_builtin>True</is_builtin>
|
||||||
|
</ExternalCallee>
|
||||||
|
</recorded_call>
|
||||||
|
<recorded_call>
|
||||||
|
<Call>
|
||||||
|
<filename>/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py</filename>
|
||||||
|
<linenum>7</linenum>
|
||||||
|
<inst_index>18</inst_index>
|
||||||
|
<bytecode_expr>
|
||||||
|
<BytecodeCall>
|
||||||
|
<function>
|
||||||
|
<BytecodeVariableName>
|
||||||
|
<name>foo</name>
|
||||||
|
</BytecodeVariableName>
|
||||||
|
</function>
|
||||||
|
</BytecodeCall>
|
||||||
|
</bytecode_expr>
|
||||||
|
</Call>
|
||||||
|
<PythonCallee>
|
||||||
|
<filename>/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py</filename>
|
||||||
|
<linenum>1</linenum>
|
||||||
|
<funcname>foo</funcname>
|
||||||
|
</PythonCallee>
|
||||||
|
</recorded_call>
|
||||||
|
<recorded_call>
|
||||||
|
<Call>
|
||||||
|
<filename>/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py</filename>
|
||||||
|
<linenum>8</linenum>
|
||||||
|
<inst_index>24</inst_index>
|
||||||
|
<bytecode_expr>
|
||||||
|
<BytecodeCall>
|
||||||
|
<function>
|
||||||
|
<BytecodeVariableName>
|
||||||
|
<name>bar</name>
|
||||||
|
</BytecodeVariableName>
|
||||||
|
</function>
|
||||||
|
</BytecodeCall>
|
||||||
|
</bytecode_expr>
|
||||||
|
</Call>
|
||||||
|
<PythonCallee>
|
||||||
|
<filename>/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py</filename>
|
||||||
|
<linenum>4</linenum>
|
||||||
|
<funcname>bar</funcname>
|
||||||
|
</PythonCallee>
|
||||||
|
</recorded_call>
|
||||||
|
<recorded_call>
|
||||||
|
<Call>
|
||||||
|
<filename>/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py</filename>
|
||||||
|
<linenum>10</linenum>
|
||||||
|
<inst_index>30</inst_index>
|
||||||
|
<bytecode_expr>
|
||||||
|
<BytecodeCall>
|
||||||
|
<function>
|
||||||
|
<BytecodeVariableName>
|
||||||
|
<name>foo</name>
|
||||||
|
</BytecodeVariableName>
|
||||||
|
</function>
|
||||||
|
</BytecodeCall>
|
||||||
|
</bytecode_expr>
|
||||||
|
</Call>
|
||||||
|
<PythonCallee>
|
||||||
|
<filename>/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py</filename>
|
||||||
|
<linenum>1</linenum>
|
||||||
|
<funcname>foo</funcname>
|
||||||
|
</PythonCallee>
|
||||||
|
</recorded_call>
|
||||||
|
<recorded_call>
|
||||||
|
<Call>
|
||||||
|
<filename>/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py</filename>
|
||||||
|
<linenum>10</linenum>
|
||||||
|
<inst_index>36</inst_index>
|
||||||
|
<bytecode_expr>
|
||||||
|
<BytecodeCall>
|
||||||
|
<function>
|
||||||
|
<BytecodeVariableName>
|
||||||
|
<name>bar</name>
|
||||||
|
</BytecodeVariableName>
|
||||||
|
</function>
|
||||||
|
</BytecodeCall>
|
||||||
|
</bytecode_expr>
|
||||||
|
</Call>
|
||||||
|
<PythonCallee>
|
||||||
|
<filename>/home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py</filename>
|
||||||
|
<linenum>4</linenum>
|
||||||
|
<funcname>bar</funcname>
|
||||||
|
</PythonCallee>
|
||||||
|
</recorded_call>
|
||||||
|
</recorded_calls>
|
||||||
</root>
|
</root>
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from setuptools import find_packages, setup
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="cg_trace",
|
name="cg_trace",
|
||||||
version="0.0.1",
|
version="0.0.2", # Remember to update src/cg_trace/__init__.py
|
||||||
description="Call graph tracing",
|
description="Call graph tracing",
|
||||||
packages=find_packages("src"),
|
packages=find_packages("src"),
|
||||||
package_dir={"": "src"},
|
package_dir={"": "src"},
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
__version__ = "0.0.1"
|
__version__ = "0.0.2" # remember to update setup.py
|
||||||
|
|
||||||
# Since the virtual machine opcodes changed in 3.6, not going to attempt to support
|
# Since the virtual machine opcodes changed in 3.6, not going to attempt to support
|
||||||
# anything before that. Using dataclasses, which is a new feature in Python 3.7
|
# anything before that. Using dataclasses, which is a new feature in Python 3.7
|
||||||
|
|||||||
@@ -0,0 +1,135 @@
|
|||||||
|
import dataclasses
|
||||||
|
import dis
|
||||||
|
import logging
|
||||||
|
from dis import Instruction
|
||||||
|
from types import FrameType
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# See https://docs.python.org/3/library/dis.html#python-bytecode-instructions for
|
||||||
|
# details on the bytecode instructions
|
||||||
|
|
||||||
|
# TODO: read https://opensource.com/article/18/4/introduction-python-bytecode
|
||||||
|
|
||||||
|
|
||||||
|
class BytecodeExpr:
|
||||||
|
"""An expression reconstructed from Python bytecode
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True, eq=True, order=True)
|
||||||
|
class BytecodeVariableName(BytecodeExpr):
|
||||||
|
name: str
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True, eq=True, order=True)
|
||||||
|
class BytecodeAttribute(BytecodeExpr):
|
||||||
|
attr_name: str
|
||||||
|
object: BytecodeExpr
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"{self.object}.{self.attr_name}"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True, eq=True, order=True)
|
||||||
|
class BytecodeCall(BytecodeExpr):
|
||||||
|
function: BytecodeExpr
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"{self.function}()"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True, eq=True, order=True)
|
||||||
|
class BytecodeUnknown(BytecodeExpr):
|
||||||
|
opname: str
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"<{self.opname}>"
|
||||||
|
|
||||||
|
|
||||||
|
def find_inst_that_added_elem_to_stack(
|
||||||
|
instructions: List[Instruction], start_index: int, stack_pos: int
|
||||||
|
):
|
||||||
|
"""Backwards traverse instructions
|
||||||
|
|
||||||
|
Backwards traverse the instructions starting at `start_index` until we find the
|
||||||
|
instruction that added the element at stack position `stack_pos` (where 0 means top
|
||||||
|
of stack). For example, if the instructions are:
|
||||||
|
|
||||||
|
```
|
||||||
|
0: LOAD_GLOBAL 0 (func)
|
||||||
|
1: LOAD_CONST 1 (42)
|
||||||
|
2: CALL_FUNCTION 1
|
||||||
|
```
|
||||||
|
|
||||||
|
We can look for the function that is called by invoking this function with
|
||||||
|
`start_index = 1` and `stack_pos = 1`. It will see that `LOAD_CONST` added the top
|
||||||
|
element to the stack, and find that `LOAD_GLOBAL` was the instruction to add element
|
||||||
|
in stack position 1 to the stack -- so the index 0 is returned.
|
||||||
|
|
||||||
|
It is assumed that if `stack_pos == 0` then the instruction you are looking for is
|
||||||
|
the one at `instructions[start_index]`. This might not hold, in case of using `NOP`
|
||||||
|
instructions.
|
||||||
|
"""
|
||||||
|
LOGGER.debug(f"find_inst_that_added_elem_to_stack {start_index=} {stack_pos=}")
|
||||||
|
assert stack_pos >= 0
|
||||||
|
for inst in reversed(instructions[: start_index + 1]):
|
||||||
|
if stack_pos == 0:
|
||||||
|
LOGGER.debug(f"Found it: {inst}")
|
||||||
|
return instructions.index(inst)
|
||||||
|
LOGGER.debug(f"Skipping {inst}")
|
||||||
|
stack_pos -= dis.stack_effect(inst.opcode, inst.arg)
|
||||||
|
|
||||||
|
raise Exception("inst_index_for_stack_diff failed")
|
||||||
|
|
||||||
|
|
||||||
|
def expr_from_instruction(instructions: List[Instruction], index: int) -> BytecodeExpr:
|
||||||
|
inst = instructions[index]
|
||||||
|
|
||||||
|
LOGGER.debug(f"expr_from_instruction: {inst} {index=}")
|
||||||
|
|
||||||
|
if inst.opname in ["LOAD_GLOBAL", "LOAD_FAST", "LOAD_NAME"]:
|
||||||
|
return BytecodeVariableName(inst.argval)
|
||||||
|
|
||||||
|
# https://docs.python.org/3/library/dis.html#opcode-LOAD_METHOD
|
||||||
|
# https://docs.python.org/3/library/dis.html#opcode-LOAD_ATTR
|
||||||
|
elif inst.opname in ["LOAD_METHOD", "LOAD_ATTR"]:
|
||||||
|
attr_name = inst.argval
|
||||||
|
obj_index = find_inst_that_added_elem_to_stack(instructions, index - 1, 0)
|
||||||
|
obj_expr = expr_from_instruction(instructions, obj_index)
|
||||||
|
return BytecodeAttribute(attr_name=attr_name, object=obj_expr)
|
||||||
|
|
||||||
|
# https://docs.python.org/3/library/dis.html#opcode-CALL_FUNCTION
|
||||||
|
elif inst.opname in ["CALL_FUNCTION", "CALL_METHOD", "CALL_FUNCTION_KW"]:
|
||||||
|
assert index > 0
|
||||||
|
assert isinstance(inst.arg, int)
|
||||||
|
if inst.opname in ["CALL_FUNCTION", "CALL_METHOD"]:
|
||||||
|
num_stack_elems = inst.arg
|
||||||
|
elif inst.opname == "CALL_FUNCTION_KW":
|
||||||
|
num_stack_elems = inst.arg + 1
|
||||||
|
|
||||||
|
func_index = find_inst_that_added_elem_to_stack(
|
||||||
|
instructions, index - 1, num_stack_elems
|
||||||
|
)
|
||||||
|
func_expr = expr_from_instruction(instructions, func_index)
|
||||||
|
return BytecodeCall(function=func_expr)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# LOAD_BUILD_CLASS is included here intentionally for now, since I don't really
|
||||||
|
# know what to do about it.
|
||||||
|
LOGGER.warning(f"Don't know how to handle this type of instruction: {inst}")
|
||||||
|
return BytecodeUnknown(inst.opname)
|
||||||
|
|
||||||
|
|
||||||
|
def expr_from_frame(frame: FrameType) -> BytecodeExpr:
|
||||||
|
bytecode = dis.Bytecode(frame.f_code, current_offset=frame.f_lasti)
|
||||||
|
|
||||||
|
LOGGER.debug(f"bytecode: \n{bytecode.dis()}")
|
||||||
|
|
||||||
|
instructions = list(iter(bytecode))
|
||||||
|
last_instruction_index = [inst.offset for inst in instructions].index(frame.f_lasti)
|
||||||
|
return expr_from_instruction(instructions, last_instruction_index)
|
||||||
@@ -4,7 +4,6 @@ import argparse
|
|||||||
def parse(args):
|
def parse(args):
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
parser.add_argument("--csv")
|
|
||||||
parser.add_argument("--xml")
|
parser.add_argument("--xml")
|
||||||
|
|
||||||
parser.add_argument("progname", help="file to run as main program")
|
parser.add_argument("progname", help="file to run as main program")
|
||||||
|
|||||||
@@ -1,57 +1,40 @@
|
|||||||
import csv
|
|
||||||
import dataclasses
|
import dataclasses
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
|
|
||||||
class Exporter:
|
def dataclass_to_xml(obj, parent):
|
||||||
|
obj_elem = etree.SubElement(parent, obj.__class__.__name__)
|
||||||
|
for field in dataclasses.fields(obj):
|
||||||
|
field_elem = etree.SubElement(obj_elem, field.name)
|
||||||
|
value = getattr(obj, field.name)
|
||||||
|
if isinstance(value, (str, int)) or value is None:
|
||||||
|
field_elem.text = str(value)
|
||||||
|
elif dataclasses.is_dataclass(value):
|
||||||
|
dataclass_to_xml(value, field_elem)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Can't export key {field.name!r} with value {value!r} (type {type(value)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class XMLExporter:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def export(recorded_calls, outfile_path):
|
def export(outfile_path, recorded_calls, info: Dict[str, str]):
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def dataclass_to_dict(obj):
|
|
||||||
d = dataclasses.asdict(obj)
|
|
||||||
prefix = obj.__class__.__name__.lower()
|
|
||||||
return {f"{prefix}_{key}": val for (key, val) in d.items()}
|
|
||||||
|
|
||||||
|
|
||||||
class CSVExporter(Exporter):
|
|
||||||
@staticmethod
|
|
||||||
def export(recorded_calls, outfile_path):
|
|
||||||
with open(outfile_path, "w", newline="") as csv_file:
|
|
||||||
writer = None
|
|
||||||
for (call, callee) in sorted(recorded_calls):
|
|
||||||
data = {
|
|
||||||
**Exporter.dataclass_to_dict(call),
|
|
||||||
**Exporter.dataclass_to_dict(callee),
|
|
||||||
}
|
|
||||||
|
|
||||||
if writer is None:
|
|
||||||
writer = csv.DictWriter(csv_file, fieldnames=data.keys())
|
|
||||||
writer.writeheader()
|
|
||||||
|
|
||||||
writer.writerow(data)
|
|
||||||
|
|
||||||
print(f"output written to {outfile_path}")
|
|
||||||
|
|
||||||
|
|
||||||
class XMLExporter(Exporter):
|
|
||||||
@staticmethod
|
|
||||||
def export(recorded_calls, outfile_path):
|
|
||||||
|
|
||||||
root = etree.Element("root")
|
root = etree.Element("root")
|
||||||
|
|
||||||
for (call, callee) in sorted(recorded_calls):
|
info_elem = etree.SubElement(root, "info")
|
||||||
data = {
|
for k, v in info.items():
|
||||||
**Exporter.dataclass_to_dict(call),
|
etree.SubElement(info_elem, k).text = v
|
||||||
**Exporter.dataclass_to_dict(callee),
|
|
||||||
}
|
|
||||||
|
|
||||||
rc = etree.SubElement(root, "recorded_call")
|
rcs = etree.SubElement(root, "recorded_calls")
|
||||||
for k, v in data.items():
|
|
||||||
# xml library only supports serializing attributes that have string values
|
for (call, callee) in sorted(recorded_calls):
|
||||||
rc.set(k, str(v))
|
rc = etree.SubElement(rcs, "recorded_call")
|
||||||
|
dataclass_to_xml(call, rc)
|
||||||
|
dataclass_to_xml(callee, rc)
|
||||||
|
|
||||||
tree = etree.ElementTree(root)
|
tree = etree.ElementTree(root)
|
||||||
tree.write(outfile_path, encoding="utf-8", pretty_print=True)
|
tree.write(outfile_path, encoding="utf-8", pretty_print=True)
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
|
||||||
from cg_trace import cmdline, tracer
|
from cg_trace import __version__, cmdline, tracer
|
||||||
from cg_trace.exporter import CSVExporter, XMLExporter
|
from cg_trace.exporter import XMLExporter
|
||||||
|
|
||||||
|
|
||||||
def record_calls(code, globals):
|
def record_calls(code, globals):
|
||||||
@@ -17,15 +19,19 @@ def record_calls(code, globals):
|
|||||||
sys.stderr = captured_stderr
|
sys.stderr = captured_stderr
|
||||||
|
|
||||||
cgt = tracer.CallGraphTracer()
|
cgt = tracer.CallGraphTracer()
|
||||||
cgt.run(code, globals, globals)
|
exit_status = cgt.run(code, globals, globals)
|
||||||
sys.stdout = real_stdout
|
sys.stdout = real_stdout
|
||||||
sys.stderr = real_stderr
|
sys.stderr = real_stderr
|
||||||
|
|
||||||
return sorted(cgt.recorded_calls), captured_stdout, captured_stderr
|
return sorted(cgt.recorded_calls), captured_stdout, captured_stderr, exit_status
|
||||||
|
|
||||||
|
|
||||||
def main(args=None) -> int:
|
def main(args=None) -> int:
|
||||||
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
|
logging.basicConfig(stream=sys.stderr, level=logging.INFO)
|
||||||
|
|
||||||
|
from . import bytecode_reconstructor
|
||||||
|
|
||||||
|
logging.getLogger(bytecode_reconstructor.__name__).setLevel(logging.INFO)
|
||||||
|
|
||||||
if args is None:
|
if args is None:
|
||||||
# first element in argv is program name
|
# first element in argv is program name
|
||||||
@@ -49,14 +55,27 @@ def main(args=None) -> int:
|
|||||||
"__cached__": None,
|
"__cached__": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
recorded_calls, captured_stdout, captured_stderr = record_calls(code, globs)
|
start = time.time()
|
||||||
|
recorded_calls, captured_stdout, captured_stderr, exit_status = record_calls(
|
||||||
|
code, globs
|
||||||
|
)
|
||||||
|
end = time.time()
|
||||||
|
elapsed_formatted = f"{end-start:.2f} seconds"
|
||||||
|
|
||||||
if opts.csv:
|
if opts.xml:
|
||||||
CSVExporter.export(recorded_calls, opts.csv)
|
XMLExporter.export(
|
||||||
elif opts.xml:
|
opts.xml,
|
||||||
XMLExporter.export(recorded_calls, opts.xml)
|
recorded_calls,
|
||||||
|
info={
|
||||||
|
"cg_trace_version": __version__,
|
||||||
|
"args": " ".join(args),
|
||||||
|
"exit_status": exit_status,
|
||||||
|
"elapsed": elapsed_formatted,
|
||||||
|
"utctimestamp": datetime.utcnow().replace(microsecond=0).isoformat(),
|
||||||
|
},
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
print("Recorded calls:")
|
print(f"--- Recorded calls (in {elapsed_formatted}) ---")
|
||||||
for (call, callee) in recorded_calls:
|
for (call, callee) in recorded_calls:
|
||||||
print(f"{call} --> {callee}")
|
print(f"{call} --> {callee}")
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,12 @@
|
|||||||
import dataclasses
|
import dataclasses
|
||||||
import dis
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from types import FrameType
|
from types import FrameType
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
from cg_trace.bytecode_reconstructor import BytecodeExpr, expr_from_frame
|
||||||
|
|
||||||
LOGGER = logging.getLogger(__name__)
|
LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -42,18 +43,26 @@ class Call:
|
|||||||
filename: str
|
filename: str
|
||||||
linenum: int
|
linenum: int
|
||||||
inst_index: int
|
inst_index: int
|
||||||
|
bytecode_expr: BytecodeExpr
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
d = dataclasses.asdict(self)
|
||||||
|
del d["bytecode_expr"]
|
||||||
|
normal_fields = ", ".join(f"{k}={v!r}" for k, v in d.items())
|
||||||
|
|
||||||
|
return f"{type(self).__name__}({normal_fields}, bytecode_expr≈{repr(str(self.bytecode_expr))})"
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_frame(cls, frame: FrameType):
|
def from_frame(cls, frame: FrameType):
|
||||||
code = frame.f_code
|
code = frame.f_code
|
||||||
|
|
||||||
b = dis.Bytecode(frame.f_code, current_offset=frame.f_lasti)
|
bytecode_expr = expr_from_frame(frame)
|
||||||
LOGGER.debug(f"bytecode: \n{b.dis()}")
|
|
||||||
|
|
||||||
return cls(
|
return cls(
|
||||||
filename=canonic_filename(code.co_filename),
|
filename=canonic_filename(code.co_filename),
|
||||||
linenum=frame.f_lineno,
|
linenum=frame.f_lineno,
|
||||||
inst_index=frame.f_lasti,
|
inst_index=frame.f_lasti,
|
||||||
|
bytecode_expr=bytecode_expr,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -165,9 +174,11 @@ class CallGraphTracer:
|
|||||||
sys.setprofile(self.profilefunc)
|
sys.setprofile(self.profilefunc)
|
||||||
exec(code, globals, locals)
|
exec(code, globals, locals)
|
||||||
sys.setprofile(None)
|
sys.setprofile(None)
|
||||||
|
return "completed"
|
||||||
except Exception:
|
except Exception:
|
||||||
sys.setprofile(None)
|
sys.setprofile(None)
|
||||||
LOGGER.info("Exception occurred while running program:", exc_info=True)
|
LOGGER.info("Exception occurred while running program:", exc_info=True)
|
||||||
|
return "exception occurred"
|
||||||
|
|
||||||
def profilefunc(self, frame: FrameType, event: str, arg):
|
def profilefunc(self, frame: FrameType, event: str, arg):
|
||||||
# ignore everything until the first call, since that is `exec` from the `run`
|
# ignore everything until the first call, since that is `exec` from the `run`
|
||||||
@@ -191,6 +202,7 @@ class CallGraphTracer:
|
|||||||
|
|
||||||
LOGGER.debug(f"profilefunc {event=}")
|
LOGGER.debug(f"profilefunc {event=}")
|
||||||
if event == "call":
|
if event == "call":
|
||||||
|
assert frame.f_back is not None
|
||||||
# in call, the `frame` argument is new the frame for entering the callee
|
# in call, the `frame` argument is new the frame for entering the callee
|
||||||
call = Call.from_frame(frame.f_back)
|
call = Call.from_frame(frame.f_back)
|
||||||
callee = PythonCallee.from_frame(frame)
|
callee = PythonCallee.from_frame(frame)
|
||||||
|
|||||||
Reference in New Issue
Block a user