Add hybrid AST / CFG graph for illustration

This commit is contained in:
Michael Hohn
2025-03-19 19:36:29 -07:00
committed by =Michael Hohn
parent 39ba0713b8
commit cc088b2d9e
4 changed files with 1984 additions and 32 deletions

View File

@@ -0,0 +1,266 @@
digraph {
compound=true;
// Original AST nodes and edges
0[label="[ExprStmt] ExprStmt"; ];
1[label="[FunctionCall] call to memcpy"; ];
2[label="[VariableAccess] input"; ];
3[label="[Literal] 0"; ];
4[label="[ArrayExpr] access to array"; ];
5[label="[ValueFieldAccess] ptr"; ];
6[label="[ValueFieldAccess] buf"; ];
7[label="[VariableAccess] input"; ];
8[label="[Literal] 1"; ];
9[label="[ArrayExpr] access to array"; ];
10[label="[ValueFieldAccess] ptr"; ];
11[label="[ValueFieldAccess] buf"; ];
12[label="[CStyleCast] (const void *)..."; ];
13[label="[VariableAccess] input"; ];
14[label="[Literal] 1"; ];
15[label="[ArrayExpr] access to array"; ];
16[label="[ValueFieldAccess] ptr"; ];
17[label="[ValueFieldAccess] size"; ];
18[label="[ExprStmt] ExprStmt"; ];
19[label="[FunctionCall] call to copy_mem_nested"; ];
20[label="[VariableAccess] input"; ];
21[label="[IfStmt] if (...) ... "; ];
22[label="[VariableAccess] input_types"; ];
23[label="[FunctionCall] call to DYN_INPUT_TYPE"; ];
24[label="[Literal] 1"; ];
25[label="[CStyleCast] (unsigned int)..."; ];
26[label="[Literal] 1"; ];
27[label="[CStyleCast] (unsigned int)..."; ];
28[label="[NEExpr] ... != ..."; ];
29[label="[BlockStmt] { ... }"; ];
30[label="[ExprStmt] ExprStmt"; ];
31[label="[FunctionCall] call to memcpy"; ];
32[label="[VariableAccess] input"; ];
33[label="[Literal] 0"; ];
34[label="[ArrayExpr] access to array"; ];
35[label="[ValueFieldAccess] ptr"; ];
36[label="[ValueFieldAccess] buf"; ];
37[label="[VariableAccess] input"; ];
38[label="[Literal] 1"; ];
39[label="[ArrayExpr] access to array"; ];
40[label="[ValueFieldAccess] ptr"; ];
41[label="[ValueFieldAccess] buf"; ];
42[label="[CStyleCast] (const void *)..."; ];
43[label="[VariableAccess] input"; ];
44[label="[Literal] 1"; ];
45[label="[ArrayExpr] access to array"; ];
46[label="[ValueFieldAccess] ptr"; ];
47[label="[ValueFieldAccess] size"; ];
48[label="[ExprStmt] ExprStmt"; ];
49[label="[FunctionCall] call to copy_mem_nested"; ];
50[label="[VariableAccess] input"; ];
51[label="[IfStmt] if (...) ... "; ];
52[label="[FunctionCall] call to DYN_INPUT_TYPE"; ];
53[label="[Literal] 1"; ];
54[label="[CStyleCast] (unsigned int)..."; ];
55[label="[Literal] 1"; ];
56[label="[CStyleCast] (unsigned int)..."; ];
57[label="[Literal] 100"; ];
58[label="[CStyleCast] (unsigned int)..."; ];
59[label="[EQExpr] ... == ..."; ];
60[label="[ExprStmt] ExprStmt"; ];
61[label="[FunctionCall] call to memcpy"; ];
62[label="[VariableAccess] input"; ];
63[label="[Literal] 0"; ];
64[label="[ArrayExpr] access to array"; ];
65[label="[ValueFieldAccess] ptr"; ];
66[label="[ValueFieldAccess] buf"; ];
67[label="[VariableAccess] input"; ];
68[label="[Literal] 1"; ];
69[label="[ArrayExpr] access to array"; ];
70[label="[ValueFieldAccess] ptr"; ];
71[label="[ValueFieldAccess] buf"; ];
72[label="[CStyleCast] (const void *)..."; ];
73[label="[VariableAccess] input"; ];
74[label="[Literal] 1"; ];
75[label="[ArrayExpr] access to array"; ];
76[label="[ValueFieldAccess] ptr"; ];
77[label="[ValueFieldAccess] size"; ];
78[label="[BlockStmt] { ... }"; ];
79[label="[IfStmt] if (...) ... "; ];
80[label="[VariableAccess] input_types"; ];
81[label="[FunctionCall] call to DYN_INPUT_TYPE"; ];
82[label="[Literal] 1"; ];
83[label="[CStyleCast] (unsigned int)..."; ];
84[label="[Literal] 1"; ];
85[label="[CStyleCast] (unsigned int)..."; ];
86[label="[NEExpr] ... != ..."; ];
87[label="[ReturnStmt] return ..."; ];
88[label="[Literal] 1"; ];
89[label="[BlockStmt] { ... }"; ];
90[label="[ExprStmt] ExprStmt"; ];
91[label="[FunctionCall] call to memcpy"; ];
92[label="[VariableAccess] input"; ];
93[label="[Literal] 0"; ];
94[label="[ArrayExpr] access to array"; ];
95[label="[ValueFieldAccess] ptr"; ];
96[label="[ValueFieldAccess] buf"; ];
97[label="[VariableAccess] input"; ];
98[label="[Literal] 1"; ];
99[label="[ArrayExpr] access to array"; ];
100[label="[ValueFieldAccess] ptr"; ];
101[label="[ValueFieldAccess] buf"; ];
102[label="[CStyleCast] (const void *)..."; ];
103[label="[VariableAccess] input"; ];
104[label="[Literal] 1"; ];
105[label="[ArrayExpr] access to array"; ];
106[label="[ValueFieldAccess] ptr"; ];
107[label="[ValueFieldAccess] size"; ];
108[label="[ExprStmt] ExprStmt"; ];
109[label="[FunctionCall] call to copy_mem_nested"; ];
110[label="[VariableAccess] input"; ];
111[label="[ReturnStmt] return ..."; ];
112[label="[Literal] 0"; ];
113[label="[BlockStmt] { ... }"; ];
114[label="[Parameter] unused"; ];
115[label="[Parameter] input"; ];
116[label="[Parameter] input_types"; ];
117[];
118[label="[TopLevelFunction] int copy_mem(unsigned int, dyn_input_t*, unsigned int)"; ];
// AST edges
0 -> 1[label="getExpr()"; ];
1 -> 6[label="getArgument(0)"; ];
4 -> 2[label="getArrayBase()"; ];
5 -> 4[label="getQualifier()"; ];
6 -> 5[label="getQualifier()"; ];
9 -> 7[label="getArrayBase()"; ];
10 -> 9[label="getQualifier()"; ];
11 -> 10[label="getQualifier()"; ];
15 -> 13[label="getArrayBase()"; ];
16 -> 15[label="getQualifier()"; ];
17 -> 16[label="getQualifier()"; ];
18 -> 19[label="getExpr()"; ];
19 -> 20[label="getArgument(0)"; ];
21 -> 28[label="getCondition()"; ];
23 -> 24[label="getArgument(0)"; ];
28 -> 22[label="getLeftOperand()"; ];
30 -> 31[label="getExpr()"; ];
31 -> 36[label="getArgument(0)"; ];
34 -> 32[label="getArrayBase()"; ];
35 -> 34[label="getQualifier()"; ];
36 -> 35[label="getQualifier()"; ];
39 -> 37[label="getArrayBase()"; ];
40 -> 39[label="getQualifier()"; ];
41 -> 40[label="getQualifier()"; ];
45 -> 43[label="getArrayBase()"; ];
46 -> 45[label="getQualifier()"; ];
47 -> 46[label="getQualifier()"; ];
48 -> 49[label="getExpr()"; ];
49 -> 50[label="getArgument(0)"; ];
51 -> 59[label="getCondition()"; ];
52 -> 53[label="getArgument(0)"; ];
59 -> 52[label="getLeftOperand()"; ];
60 -> 61[label="getExpr()"; ];
61 -> 66[label="getArgument(0)"; ];
64 -> 62[label="getArrayBase()"; ];
65 -> 64[label="getQualifier()"; ];
66 -> 65[label="getQualifier()"; ];
69 -> 67[label="getArrayBase()"; ];
70 -> 69[label="getQualifier()"; ];
71 -> 70[label="getQualifier()"; ];
75 -> 73[label="getArrayBase()"; ];
76 -> 75[label="getQualifier()"; ];
77 -> 76[label="getQualifier()"; ];
78 -> 60[label="getStmt(0)"; ];
79 -> 86[label="getCondition()"; ];
81 -> 82[label="getArgument(0)"; ];
86 -> 80[label="getLeftOperand()"; ];
87 -> 88[label="getExpr()"; ];
89 -> 87[label="getStmt(0)"; ];
90 -> 91[label="getExpr()"; ];
91 -> 96[label="getArgument(0)"; ];
94 -> 92[label="getArrayBase()"; ];
95 -> 94[label="getQualifier()"; ];
96 -> 95[label="getQualifier()"; ];
99 -> 97[label="getArrayBase()"; ];
100 -> 99[label="getQualifier()"; ];
101 -> 100[label="getQualifier()"; ];
105 -> 103[label="getArrayBase()"; ];
106 -> 105[label="getQualifier()"; ];
107 -> 106[label="getQualifier()"; ];
108 -> 109[label="getExpr()"; ];
109 -> 110[label="getArgument(0)"; ];
111 -> 112[label="getExpr()"; ];
113 -> 0[label="getStmt(0)"; ];
118 -> 117[label="<params>"; ];
117 -> 114[label="getParameter(0)"; ];
1 -> 11[label="getArgument(1)"; ];
4 -> 3[label="getArrayOffset()"; ];
9 -> 8[label="getArrayOffset()"; ];
15 -> 14[label="getArrayOffset()"; ];
21 -> 29[label="getThen()"; ];
23 -> 26[label="getArgument(1)"; ];
28 -> 23[label="getRightOperand()"; ];
31 -> 41[label="getArgument(1)"; ];
34 -> 33[label="getArrayOffset()"; ];
39 -> 38[label="getArrayOffset()"; ];
45 -> 44[label="getArrayOffset()"; ];
51 -> 78[label="getThen()"; ];
52 -> 55[label="getArgument(1)"; ];
59 -> 57[label="getRightOperand()"; ];
61 -> 71[label="getArgument(1)"; ];
64 -> 63[label="getArrayOffset()"; ];
69 -> 68[label="getArrayOffset()"; ];
75 -> 74[label="getArrayOffset()"; ];
79 -> 89[label="getThen()"; ];
81 -> 84[label="getArgument(1)"; ];
86 -> 81[label="getRightOperand()"; ];
91 -> 101[label="getArgument(1)"; ];
94 -> 93[label="getArrayOffset()"; ];
99 -> 98[label="getArrayOffset()"; ];
105 -> 104[label="getArrayOffset()"; ];
113 -> 18[label="getStmt(1)"; ];
118 -> 113[label="getEntryPoint()"; ];
117 -> 115[label="getParameter(1)"; ];
1 -> 17[label="getArgument(2)"; ];
23 -> 25[label="getArgument(0).getFullyConverted()"; ];
31 -> 47[label="getArgument(2)"; ];
52 -> 54[label="getArgument(0).getFullyConverted()"; ];
59 -> 58[label="getRightOperand().getFullyConverted()"; ];
61 -> 77[label="getArgument(2)"; ];
81 -> 83[label="getArgument(0).getFullyConverted()"; ];
91 -> 107[label="getArgument(2)"; ];
113 -> 21[label="getStmt(2)"; ];
117 -> 116[label="getParameter(2)"; ];
1 -> 12[label="getArgument(1).getFullyConverted()"; ];
23 -> 27[label="getArgument(1).getFullyConverted()"; ];
31 -> 42[label="getArgument(1).getFullyConverted()"; ];
52 -> 56[label="getArgument(1).getFullyConverted()"; ];
61 -> 72[label="getArgument(1).getFullyConverted()"; ];
81 -> 85[label="getArgument(1).getFullyConverted()"; ];
91 -> 102[label="getArgument(1).getFullyConverted()"; ];
113 -> 30[label="getStmt(3)"; ];
113 -> 48[label="getStmt(4)"; ];
113 -> 51[label="getStmt(5)"; ];
113 -> 79[label="getStmt(6)"; ];
113 -> 90[label="getStmt(7)"; ];
113 -> 108[label="getStmt(8)"; ];
113 -> 111[label="getStmt(9)"; ];
// First 17 CFG edges connected to AST nodes, numbered in CFG order
edge [color=blue, constraint=false, rankdir=none];
// Correspond to the first 17 CFG nodes/edges
0 -> 2 [color=blue, label="1"]; // ExprStmt → input
2 -> 3 [color=blue, label="2"]; // input → 0
3 -> 4 [color=blue, label="3"]; // 0 → access to array
4 -> 5 [color=blue, label="4"]; // access to array → ptr
5 -> 6 [color=blue, label="5"]; // ptr → buf
6 -> 7 [color=blue, label="6"]; // buf → input
7 -> 8 [color=blue, label="7"]; // input → 1
8 -> 9 [color=blue, label="8"]; // 1 → access to array
9 -> 10 [color=blue, label="9"]; // access to array → ptr
10 -> 11 [color=blue, label="10"]; // ptr → buf
11 -> 12 [color=blue, label="11"]; // buf → input (second)
12 -> 13 [color=blue, label="12"]; // input → 1
13 -> 14 [color=blue, label="13"]; // 1 → access to array
14 -> 15 [color=blue, label="14"]; // access to array → ptr
15 -> 16 [color=blue, label="15"]; // ptr → size
16 -> 1 [color=blue, label="16"]; // size → call to memcpy
1 -> 18 [color=blue, label="17"]; // call to memcpy → next ExprStmt
}

Binary file not shown.

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 90 KiB

View File

@@ -59,37 +59,6 @@
open cfg.dot/cpp/print-cfg.pdf open cfg.dot/cpp/print-cfg.pdf
#+END_SRC #+END_SRC
* Original source code
#+BEGIN_SRC c++
int copy_mem(unsigned int unused, dyn_input_t *input,
unsigned int input_types) {
memcpy(input[0].ptr.buf, input[1].ptr.buf,
input[1].ptr.size); // NON_COMPLIANT - type not checked
copy_mem_nested(input); // NON_COMPLIANT - type not checked
if (input_types != DYN_INPUT_TYPE(DYN_INPUT_TYPE_MEM, DYN_INPUT_TYPE_MEM)) {
}
memcpy(input[0].ptr.buf, input[1].ptr.buf,
input[1].ptr.size); // NON_COMPLIANT - guard doesn't control all paths
copy_mem_nested(input); // NON_COMPLIANT - guard doesn't control all paths
if (DYN_INPUT_TYPE(DYN_INPUT_TYPE_MEM, DYN_INPUT_TYPE_MEM) == 100) {
memcpy(input[0].ptr.buf, input[1].ptr.buf,
input[1].ptr.size); // NON_COMPLIANT - useless type check
}
if (input_types != DYN_INPUT_TYPE(DYN_INPUT_TYPE_MEM, DYN_INPUT_TYPE_MEM)) {
return 1;
}
memcpy(input[0].ptr.buf, input[1].ptr.buf,
input[1].ptr.size); // COMPLIANT - type checked
copy_mem_nested(input); // COMPLIANT - type checked
return 0;
}
#+END_SRC
* AST * AST
The ast is inlined here. For better viewing, open the The ast is inlined here. For better viewing, open the
pdf ([[./ast.dot/cpp/print-ast.pdf]]) separately. pdf ([[./ast.dot/cpp/print-ast.pdf]]) separately.
@@ -102,4 +71,50 @@
#+ATTR_HTML: :class scrollable-svg #+ATTR_HTML: :class scrollable-svg
[[./cfg.dot/cpp/print-cfg.svg]] [[./cfg.dot/cpp/print-cfg.svg]]
* GPTs
A gpt was used to add 17 of the CFG edges to the AST tree; more resulted in a
very confusing graph. The hybrid is in =cfg.dot/cpp/ast-cfg-hybrid.dot=
* Render via dot
The hybrid is rendered via dot. The other renderers produced very spread
layouts.
#+BEGIN_SRC sh
# Convert dot to pdf
twopi -Tpdf < cfg.dot/cpp/ast-cfg-hybrid.dot > cfg.dot/cpp/ast-cfg-hybrid.pdf
circo -Tpdf < cfg.dot/cpp/ast-cfg-hybrid.dot > cfg.dot/cpp/ast-cfg-hybrid.pdf
dot -Tpdf < cfg.dot/cpp/ast-cfg-hybrid.dot > cfg.dot/cpp/ast-cfg-hybrid.pdf
dot -Tsvg < cfg.dot/cpp/ast-cfg-hybrid.dot > cfg.dot/cpp/ast-cfg-hybrid.svg
# View the graph
open -a skim cfg.dot/cpp/ast-cfg-hybrid.pdf
#+END_SRC
* AST-CFG HYBRID
The ast-cfg hybrid is inlined here. For better viewing, open the
pdf ([[./cfg.dot/cpp/ast-cfg-hybrid.pdf]]) separately.
#+ATTR_HTML: :width 100%
[[./cfg.dot/cpp/ast-cfg-hybrid.svg]]
* Hybrid portion of source code
The part of the source code corresponding to the hybrid portion, with space for
adding edges:
#+BEGIN_SRC c++
int copy_mem(unsigned int unused, dyn_input_t *input,
unsigned int input_types) {
memcpy(input[0].ptr.buf, input[1].ptr.buf,
input[1].ptr.size);
copy_mem_nested(input);
...;
}
#+END_SRC