diff --git a/python/tools/recorded-call-graph-metrics/example-traces/builtins.xml b/python/tools/recorded-call-graph-metrics/example-traces/builtins.xml
index 60703c50cf2..c9dacc3159d 100644
--- a/python/tools/recorded-call-graph-metrics/example-traces/builtins.xml
+++ b/python/tools/recorded-call-graph-metrics/example-traces/builtins.xml
@@ -1,6 +1,105 @@
-
-
-
-
+
+ 0.0.2
+ --xml example-traces/builtins.xml example/builtins.py
+ completed
+ 0.00 seconds
+ 2020-07-20T09:27:00
+
+
+
+
+ /home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/builtins.py
+ 1
+ 4
+
+
+
+
+ print
+
+
+
+
+
+
+ builtins
+ print
+ True
+
+
+
+
+ /home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/builtins.py
+ 2
+ 12
+
+
+
+
+ len
+
+
+
+
+
+
+ builtins
+ len
+ True
+
+
+
+
+ /home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/builtins.py
+ 4
+ 28
+
+
+
+
+ append
+
+
+
+
+
+
+
+ None
+ list.append
+ True
+
+
+
+
+ /home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/builtins.py
+ 7
+ 44
+
+
+
+
+ getdefaultencoding
+
+
+
+
+
+
+
+ sys
+ getdefaultencoding
+ True
+
+
+
diff --git a/python/tools/recorded-call-graph-metrics/example-traces/simple.xml b/python/tools/recorded-call-graph-metrics/example-traces/simple.xml
index 25f494aec86..7aa14f62728 100644
--- a/python/tools/recorded-call-graph-metrics/example-traces/simple.xml
+++ b/python/tools/recorded-call-graph-metrics/example-traces/simple.xml
@@ -1,8 +1,137 @@
-
-
-
-
-
-
+
+ 0.0.2
+ --xml example-traces/simple.xml example/simple.py
+ completed
+ 0.00 seconds
+ 2020-07-20T09:27:01
+
+
+
+
+ /home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py
+ 2
+ 4
+
+
+
+
+ print
+
+
+
+
+
+
+ builtins
+ print
+ True
+
+
+
+
+ /home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py
+ 5
+ 4
+
+
+
+
+ print
+
+
+
+
+
+
+ builtins
+ print
+ True
+
+
+
+
+ /home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py
+ 7
+ 18
+
+
+
+
+ foo
+
+
+
+
+
+
+ /home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py
+ 1
+ foo
+
+
+
+
+ /home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py
+ 8
+ 24
+
+
+
+
+ bar
+
+
+
+
+
+
+ /home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py
+ 4
+ bar
+
+
+
+
+ /home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py
+ 10
+ 30
+
+
+
+
+ foo
+
+
+
+
+
+
+ /home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py
+ 1
+ foo
+
+
+
+
+ /home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py
+ 10
+ 36
+
+
+
+
+ bar
+
+
+
+
+
+
+ /home/rasmus/code/ql/python/tools/recorded-call-graph-metrics/example/simple.py
+ 4
+ bar
+
+
+
diff --git a/python/tools/recorded-call-graph-metrics/setup.py b/python/tools/recorded-call-graph-metrics/setup.py
index 9e41f1e0e4c..ef212fac8f1 100644
--- a/python/tools/recorded-call-graph-metrics/setup.py
+++ b/python/tools/recorded-call-graph-metrics/setup.py
@@ -4,7 +4,7 @@ from setuptools import find_packages, setup
setup(
name="cg_trace",
- version="0.0.1",
+ version="0.0.2", # Remember to update src/cg_trace/__init__.py
description="Call graph tracing",
packages=find_packages("src"),
package_dir={"": "src"},
diff --git a/python/tools/recorded-call-graph-metrics/src/cg_trace/__init__.py b/python/tools/recorded-call-graph-metrics/src/cg_trace/__init__.py
index 9432512b4e7..582f75fa50d 100644
--- a/python/tools/recorded-call-graph-metrics/src/cg_trace/__init__.py
+++ b/python/tools/recorded-call-graph-metrics/src/cg_trace/__init__.py
@@ -1,6 +1,6 @@
import sys
-__version__ = "0.0.1"
+__version__ = "0.0.2" # remember to update setup.py
# Since the virtual machine opcodes changed in 3.6, not going to attempt to support
# anything before that. Using dataclasses, which is a new feature in Python 3.7
diff --git a/python/tools/recorded-call-graph-metrics/src/cg_trace/bytecode_reconstructor.py b/python/tools/recorded-call-graph-metrics/src/cg_trace/bytecode_reconstructor.py
new file mode 100644
index 00000000000..9ab26bb8bfb
--- /dev/null
+++ b/python/tools/recorded-call-graph-metrics/src/cg_trace/bytecode_reconstructor.py
@@ -0,0 +1,135 @@
+import dataclasses
+import dis
+import logging
+from dis import Instruction
+from types import FrameType
+from typing import List
+
+LOGGER = logging.getLogger(__name__)
+
+# See https://docs.python.org/3/library/dis.html#python-bytecode-instructions for
+# details on the bytecode instructions
+
+# TODO: read https://opensource.com/article/18/4/introduction-python-bytecode
+
+
+class BytecodeExpr:
+ """An expression reconstructed from Python bytecode
+ """
+
+
+@dataclasses.dataclass(frozen=True, eq=True, order=True)
+class BytecodeVariableName(BytecodeExpr):
+ name: str
+
+ def __str__(self):
+ return self.name
+
+
+@dataclasses.dataclass(frozen=True, eq=True, order=True)
+class BytecodeAttribute(BytecodeExpr):
+ attr_name: str
+ object: BytecodeExpr
+
+ def __str__(self):
+ return f"{self.object}.{self.attr_name}"
+
+
+@dataclasses.dataclass(frozen=True, eq=True, order=True)
+class BytecodeCall(BytecodeExpr):
+ function: BytecodeExpr
+
+ def __str__(self):
+ return f"{self.function}()"
+
+
+@dataclasses.dataclass(frozen=True, eq=True, order=True)
+class BytecodeUnknown(BytecodeExpr):
+ opname: str
+
+ def __str__(self):
+ return f"<{self.opname}>"
+
+
+def find_inst_that_added_elem_to_stack(
+ instructions: List[Instruction], start_index: int, stack_pos: int
+):
+ """Backwards traverse instructions
+
+ Backwards traverse the instructions starting at `start_index` until we find the
+ instruction that added the element at stack position `stack_pos` (where 0 means top
+ of stack). For example, if the instructions are:
+
+ ```
+ 0: LOAD_GLOBAL 0 (func)
+ 1: LOAD_CONST 1 (42)
+ 2: CALL_FUNCTION 1
+ ```
+
+ We can look for the function that is called by invoking this function with
+ `start_index = 1` and `stack_pos = 1`. It will see that `LOAD_CONST` added the top
+ element to the stack, and find that `LOAD_GLOBAL` was the instruction to add element
+ in stack position 1 to the stack -- so the index 0 is returned.
+
+ It is assumed that if `stack_pos == 0` then the instruction you are looking for is
+ the one at `instructions[start_index]`. This might not hold, in case of using `NOP`
+ instructions.
+ """
+ LOGGER.debug(f"find_inst_that_added_elem_to_stack {start_index=} {stack_pos=}")
+ assert stack_pos >= 0
+ for inst in reversed(instructions[: start_index + 1]):
+ if stack_pos == 0:
+ LOGGER.debug(f"Found it: {inst}")
+ return instructions.index(inst)
+ LOGGER.debug(f"Skipping {inst}")
+ stack_pos -= dis.stack_effect(inst.opcode, inst.arg)
+
+ raise Exception("inst_index_for_stack_diff failed")
+
+
+def expr_from_instruction(instructions: List[Instruction], index: int) -> BytecodeExpr:
+ inst = instructions[index]
+
+ LOGGER.debug(f"expr_from_instruction: {inst} {index=}")
+
+ if inst.opname in ["LOAD_GLOBAL", "LOAD_FAST", "LOAD_NAME"]:
+ return BytecodeVariableName(inst.argval)
+
+ # https://docs.python.org/3/library/dis.html#opcode-LOAD_METHOD
+ # https://docs.python.org/3/library/dis.html#opcode-LOAD_ATTR
+ elif inst.opname in ["LOAD_METHOD", "LOAD_ATTR"]:
+ attr_name = inst.argval
+ obj_index = find_inst_that_added_elem_to_stack(instructions, index - 1, 0)
+ obj_expr = expr_from_instruction(instructions, obj_index)
+ return BytecodeAttribute(attr_name=attr_name, object=obj_expr)
+
+ # https://docs.python.org/3/library/dis.html#opcode-CALL_FUNCTION
+ elif inst.opname in ["CALL_FUNCTION", "CALL_METHOD", "CALL_FUNCTION_KW"]:
+ assert index > 0
+ assert isinstance(inst.arg, int)
+ if inst.opname in ["CALL_FUNCTION", "CALL_METHOD"]:
+ num_stack_elems = inst.arg
+ elif inst.opname == "CALL_FUNCTION_KW":
+ num_stack_elems = inst.arg + 1
+
+ func_index = find_inst_that_added_elem_to_stack(
+ instructions, index - 1, num_stack_elems
+ )
+ func_expr = expr_from_instruction(instructions, func_index)
+ return BytecodeCall(function=func_expr)
+
+ else:
+ # LOAD_BUILD_CLASS is included here intentionally for now, since I don't really
+ # know what to do about it.
+ LOGGER.warning(f"Don't know how to handle this type of instruction: {inst}")
+ return BytecodeUnknown(inst.opname)
+
+
+def expr_from_frame(frame: FrameType) -> BytecodeExpr:
+ bytecode = dis.Bytecode(frame.f_code, current_offset=frame.f_lasti)
+
+ LOGGER.debug(f"bytecode: \n{bytecode.dis()}")
+
+ instructions = list(iter(bytecode))
+ last_instruction_index = [inst.offset for inst in instructions].index(frame.f_lasti)
+ return expr_from_instruction(instructions, last_instruction_index)
diff --git a/python/tools/recorded-call-graph-metrics/src/cg_trace/cmdline.py b/python/tools/recorded-call-graph-metrics/src/cg_trace/cmdline.py
index aaf047315d3..ddbeeaf50dc 100644
--- a/python/tools/recorded-call-graph-metrics/src/cg_trace/cmdline.py
+++ b/python/tools/recorded-call-graph-metrics/src/cg_trace/cmdline.py
@@ -4,7 +4,6 @@ import argparse
def parse(args):
parser = argparse.ArgumentParser()
- parser.add_argument("--csv")
parser.add_argument("--xml")
parser.add_argument("progname", help="file to run as main program")
diff --git a/python/tools/recorded-call-graph-metrics/src/cg_trace/exporter.py b/python/tools/recorded-call-graph-metrics/src/cg_trace/exporter.py
index bee0d5dc0f6..b6051a01a16 100644
--- a/python/tools/recorded-call-graph-metrics/src/cg_trace/exporter.py
+++ b/python/tools/recorded-call-graph-metrics/src/cg_trace/exporter.py
@@ -1,57 +1,40 @@
-import csv
import dataclasses
+from typing import Dict
from lxml import etree
-class Exporter:
+def dataclass_to_xml(obj, parent):
+ obj_elem = etree.SubElement(parent, obj.__class__.__name__)
+ for field in dataclasses.fields(obj):
+ field_elem = etree.SubElement(obj_elem, field.name)
+ value = getattr(obj, field.name)
+ if isinstance(value, (str, int)) or value is None:
+ field_elem.text = str(value)
+ elif dataclasses.is_dataclass(value):
+ dataclass_to_xml(value, field_elem)
+ else:
+ raise ValueError(
+ f"Can't export key {field.name!r} with value {value!r} (type {type(value)}"
+ )
+
+
+class XMLExporter:
@staticmethod
- def export(recorded_calls, outfile_path):
- raise NotImplementedError()
-
- @staticmethod
- def dataclass_to_dict(obj):
- d = dataclasses.asdict(obj)
- prefix = obj.__class__.__name__.lower()
- return {f"{prefix}_{key}": val for (key, val) in d.items()}
-
-
-class CSVExporter(Exporter):
- @staticmethod
- def export(recorded_calls, outfile_path):
- with open(outfile_path, "w", newline="") as csv_file:
- writer = None
- for (call, callee) in sorted(recorded_calls):
- data = {
- **Exporter.dataclass_to_dict(call),
- **Exporter.dataclass_to_dict(callee),
- }
-
- if writer is None:
- writer = csv.DictWriter(csv_file, fieldnames=data.keys())
- writer.writeheader()
-
- writer.writerow(data)
-
- print(f"output written to {outfile_path}")
-
-
-class XMLExporter(Exporter):
- @staticmethod
- def export(recorded_calls, outfile_path):
+ def export(outfile_path, recorded_calls, info: Dict[str, str]):
root = etree.Element("root")
- for (call, callee) in sorted(recorded_calls):
- data = {
- **Exporter.dataclass_to_dict(call),
- **Exporter.dataclass_to_dict(callee),
- }
+ info_elem = etree.SubElement(root, "info")
+ for k, v in info.items():
+ etree.SubElement(info_elem, k).text = v
- rc = etree.SubElement(root, "recorded_call")
- for k, v in data.items():
- # xml library only supports serializing attributes that have string values
- rc.set(k, str(v))
+ rcs = etree.SubElement(root, "recorded_calls")
+
+ for (call, callee) in sorted(recorded_calls):
+ rc = etree.SubElement(rcs, "recorded_call")
+ dataclass_to_xml(call, rc)
+ dataclass_to_xml(callee, rc)
tree = etree.ElementTree(root)
tree.write(outfile_path, encoding="utf-8", pretty_print=True)
diff --git a/python/tools/recorded-call-graph-metrics/src/cg_trace/main.py b/python/tools/recorded-call-graph-metrics/src/cg_trace/main.py
index 77aed2146c9..3691fe4149b 100644
--- a/python/tools/recorded-call-graph-metrics/src/cg_trace/main.py
+++ b/python/tools/recorded-call-graph-metrics/src/cg_trace/main.py
@@ -1,10 +1,12 @@
import logging
import os
import sys
+import time
+from datetime import datetime
from io import StringIO
-from cg_trace import cmdline, tracer
-from cg_trace.exporter import CSVExporter, XMLExporter
+from cg_trace import __version__, cmdline, tracer
+from cg_trace.exporter import XMLExporter
def record_calls(code, globals):
@@ -17,15 +19,19 @@ def record_calls(code, globals):
sys.stderr = captured_stderr
cgt = tracer.CallGraphTracer()
- cgt.run(code, globals, globals)
+ exit_status = cgt.run(code, globals, globals)
sys.stdout = real_stdout
sys.stderr = real_stderr
- return sorted(cgt.recorded_calls), captured_stdout, captured_stderr
+ return sorted(cgt.recorded_calls), captured_stdout, captured_stderr, exit_status
def main(args=None) -> int:
- logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
+ logging.basicConfig(stream=sys.stderr, level=logging.INFO)
+
+ from . import bytecode_reconstructor
+
+ logging.getLogger(bytecode_reconstructor.__name__).setLevel(logging.INFO)
if args is None:
# first element in argv is program name
@@ -49,14 +55,27 @@ def main(args=None) -> int:
"__cached__": None,
}
- recorded_calls, captured_stdout, captured_stderr = record_calls(code, globs)
+ start = time.time()
+ recorded_calls, captured_stdout, captured_stderr, exit_status = record_calls(
+ code, globs
+ )
+ end = time.time()
+ elapsed_formatted = f"{end-start:.2f} seconds"
- if opts.csv:
- CSVExporter.export(recorded_calls, opts.csv)
- elif opts.xml:
- XMLExporter.export(recorded_calls, opts.xml)
+ if opts.xml:
+ XMLExporter.export(
+ opts.xml,
+ recorded_calls,
+ info={
+ "cg_trace_version": __version__,
+ "args": " ".join(args),
+ "exit_status": exit_status,
+ "elapsed": elapsed_formatted,
+ "utctimestamp": datetime.utcnow().replace(microsecond=0).isoformat(),
+ },
+ )
else:
- print("Recorded calls:")
+ print(f"--- Recorded calls (in {elapsed_formatted}) ---")
for (call, callee) in recorded_calls:
print(f"{call} --> {callee}")
diff --git a/python/tools/recorded-call-graph-metrics/src/cg_trace/tracer.py b/python/tools/recorded-call-graph-metrics/src/cg_trace/tracer.py
index 87835c4bf9e..25a49943214 100644
--- a/python/tools/recorded-call-graph-metrics/src/cg_trace/tracer.py
+++ b/python/tools/recorded-call-graph-metrics/src/cg_trace/tracer.py
@@ -1,11 +1,12 @@
import dataclasses
-import dis
import logging
import os
import sys
from types import FrameType
from typing import Optional
+from cg_trace.bytecode_reconstructor import BytecodeExpr, expr_from_frame
+
LOGGER = logging.getLogger(__name__)
@@ -42,18 +43,26 @@ class Call:
filename: str
linenum: int
inst_index: int
+ bytecode_expr: BytecodeExpr
+
+ def __str__(self):
+ d = dataclasses.asdict(self)
+ del d["bytecode_expr"]
+ normal_fields = ", ".join(f"{k}={v!r}" for k, v in d.items())
+
+ return f"{type(self).__name__}({normal_fields}, bytecode_expr≈{repr(str(self.bytecode_expr))})"
@classmethod
def from_frame(cls, frame: FrameType):
code = frame.f_code
- b = dis.Bytecode(frame.f_code, current_offset=frame.f_lasti)
- LOGGER.debug(f"bytecode: \n{b.dis()}")
+ bytecode_expr = expr_from_frame(frame)
return cls(
filename=canonic_filename(code.co_filename),
linenum=frame.f_lineno,
inst_index=frame.f_lasti,
+ bytecode_expr=bytecode_expr,
)
@@ -165,9 +174,11 @@ class CallGraphTracer:
sys.setprofile(self.profilefunc)
exec(code, globals, locals)
sys.setprofile(None)
+ return "completed"
except Exception:
sys.setprofile(None)
LOGGER.info("Exception occurred while running program:", exc_info=True)
+ return "exception occurred"
def profilefunc(self, frame: FrameType, event: str, arg):
# ignore everything until the first call, since that is `exec` from the `run`
@@ -191,6 +202,7 @@ class CallGraphTracer:
LOGGER.debug(f"profilefunc {event=}")
if event == "call":
+ assert frame.f_back is not None
# in call, the `frame` argument is new the frame for entering the callee
call = Call.from_frame(frame.f_back)
callee = PythonCallee.from_frame(frame)