convert_otel_2_perfetto.py 12.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
import argparse
import bisect
import json
import time
from collections import defaultdict
from pathlib import Path
from typing import Any, Dict, Iterable, List, Tuple

parser = argparse.ArgumentParser(
    description="Convert SGLang OTEL trace files to Perfetto format.",
    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
    "-i",
    "--input",
    dest="input_file",
    required=True,
    type=str,
    help="Path to the input OTEL trace file (JSON or JSONL format).",
)
parser.add_argument(
    "-o",
    "--output",
    dest="output_file",
    type=str,
    default="sglang_trace_perfetto.json",
    help="Path to the output Perfetto JSON file.",
)
parser.add_argument(
    "-f", "--torch-file", dest="torch_file", help="specify torch profile file"
)

args = parser.parse_args()

perfetto_data = None
if args.torch_file:
    with open(args.torch_file, "r", encoding="utf-8") as file:
        perfetto_data = json.load(file)
        baseline = perfetto_data["baseTimeNanoseconds"]
else:
    baseline = 0


def id_generator():
    i = 0
    while True:
        yield i
        i += 1


relation_id_gen = id_generator()


class SpanLayoutContainer:
    def __init__(self):
        self.intervals = []

    def check_overlap(self, start, end):
        idx = bisect.bisect_left(self.intervals, (start, float("-inf")))

        if idx > 0:
            prev_start, prev_end = self.intervals[idx - 1]
            if prev_end > start:
                return True

        if idx < len(self.intervals):
            next_start, next_end = self.intervals[idx]
            if next_start < end:
                return True
        return False

    def insert_span(self, start, end):
        bisect.insort_left(self.intervals, (start, end))


def new_metadata_level1(name: str, pid):
    return {
        "name": "process_name",
        "ph": "M",
        "pid": pid,
        "args": {"name": name},
    }


def new_metadata_level2(name: str, pid, slot_seq):
    return {
        "name": "thread_name",
        "ph": "M",
        "pid": pid,
        "tid": slot_seq,
        "args": {"name": name},
    }


def __find_line(graph, trans_graph_status, slot_meta_data, pid, start, end):
    if pid in trans_graph_status:
        line = trans_graph_status[pid]
        if start == end:
            return line
        # check conflict
        if not graph[pid][line].check_overlap(start, end):
            return line

    if pid not in graph:
        line = 1
        graph[pid] = {line: SpanLayoutContainer()}
        trans_graph_status[pid] = line
        slot_meta_data.append(new_metadata_level2("slot", pid, line))
        return line

    for line in graph[pid]:
        if not graph[pid][line].check_overlap(start, end):
            trans_graph_status[pid] = line
            return line

    new_line = len(graph[pid]) + 1
    graph[pid][new_line] = SpanLayoutContainer()
    trans_graph_status[pid] = new_line
    slot_meta_data.append(new_metadata_level2("slot", pid, new_line))
    return new_line


OtelSpan = Dict[str, Any]


def load_otel_data(path: str | Path):
    p = Path(path)
    with p.open("rt", encoding="utf-8") as f:
        first = f.read(1)
        f.seek(0)
        if first == "[":
            data = json.load(f)  # JSON array
        else:
            data = [json.loads(line) for line in f if line.strip()]  # JSONL
    return data


def extract_all_otel_spans(otel_data):
    otel_spans = []
    for line_data in otel_data:
        for resource_spans in line_data["resourceSpans"]:
            for scope_spans in resource_spans["scopeSpans"]:
                for span in scope_spans["spans"]:
                    if "attributes" in span:
                        attributes_dict = {
                            attr.get("key"): next(
                                iter(attr.get("value", {}).values()), None
                            )
                            for attr in span["attributes"]
                        }
                        span["attributes"] = attributes_dict
                    else:
                        span["attributes"] = {}
                    otel_spans.append(span)
    return otel_spans


def build_otel_span_tree(otel_spans):
    span_id_map = {span["spanId"]: span for span in otel_spans}
    for span in otel_spans:
        span["child"] = []

    bootstrap_room_spans = []

    for span in otel_spans:
        span_id = span["spanId"]
        parent_span_id = span.get("parentSpanId", "")
        if parent_span_id == "":
            # check if root span is a request span
            attrs = span.get("attributes", {})
            bootstrap_room_spans.append(span)
        elif parent_span_id in span_id_map:
            parent_span = span_id_map[parent_span_id]
            parent_span["child"].append(span)

        link_spans = []
        if "links" in span:
            for link in span["links"]:
                link_span = span_id_map.get(link["spanId"])
                if link_span:
                    link_spans.append(link_span)
            span["links"] = link_spans

    return bootstrap_room_spans


def generate_perfetto_span(otel_bootstrap_room_spans, thread_meta_data):
    for bootstrap_room_span in otel_bootstrap_room_spans:
        bootstrap_room = bootstrap_room_span["attributes"]["bootstrap_room"]
        bootstrap_room_span["spans"] = []

        for node_req_span in bootstrap_room_span["child"]:
            rid = node_req_span["attributes"]["rid"]

            for thread_span in node_req_span["child"]:
                pid = int(thread_span["attributes"]["pid"])
                thread_name = f'{thread_span["attributes"]["host_id"][:8]}:{thread_span["attributes"]["thread_label"]}'
                if "tp_rank" in thread_span["attributes"]:
                    thread_name += f"-TP{thread_span['attributes']['tp_rank']}"

                if pid not in thread_meta_data:
                    thread_meta_data[pid] = new_metadata_level1(thread_name, pid)

                for span in thread_span["child"]:
                    span["attributes"]["bootstrap_room"] = bootstrap_room
                    span["attributes"]["rid"] = rid
                    span["host_id"] = thread_span["attributes"]["host_id"]
                    span["pid"] = pid

                    span["startTimeUnixNano"] = int(span["startTimeUnixNano"])
                    span["endTimeUnixNano"] = int(span["endTimeUnixNano"])
                    ts = span["startTimeUnixNano"]
                    dur = span["endTimeUnixNano"] - ts

                    perfetto_span = {
                        "ph": "X",
                        "name": span.get("name", "unknown"),
                        "cat": "sglang",
                        "ts": (ts - baseline) / 1000.0,
                        "dur": (dur - 1000) / 1000.0,
                        "pid": pid,
                        "tid": 0,
                        "args": span["attributes"],
                    }

                    span["perfetto_span"] = perfetto_span
                    bootstrap_room_span["spans"].append(span)


def generate_perfetto_span_layout(otel_bootstrap_room_spans, slot_meta_data):
    for bootstrap_room_span in otel_bootstrap_room_spans:
        bootstrap_room_span["spans"] = sorted(
            bootstrap_room_span["spans"], key=lambda x: int(x["startTimeUnixNano"])
        )

    otel_bootstrap_room_spans = sorted(
        otel_bootstrap_room_spans, key=lambda x: int(x["spans"][0]["startTimeUnixNano"])
    )
    graph = {}
    for bootstrap_room_span in otel_bootstrap_room_spans:
        req_thread_status = {}
        for span in bootstrap_room_span["spans"]:
            line = __find_line(
                graph,
                req_thread_status,
                slot_meta_data,
                span["perfetto_span"]["pid"],
                span["startTimeUnixNano"],
                span["endTimeUnixNano"],
            )
            graph[span["perfetto_span"]["pid"]][line].insert_span(
                span["startTimeUnixNano"], span["endTimeUnixNano"]
            )
            span["perfetto_span"]["tid"] = line


def generate_perfetto_events(otel_bootstrap_room_spans):
    for bootstrap_room_span in otel_bootstrap_room_spans:
        for span in bootstrap_room_span["spans"]:
            span["perfetto_events"] = []
            if "events" in span:
                for event in span["events"]:
                    attributes_dict = {
                        attr.get("key"): next(
                            iter(attr.get("value", {}).values()), None
                        )
                        for attr in event["attributes"]
                    }
                    perfetto_event = {
                        "ph": "i",
                        "cat": "sglang",
                        "ts": (int(event["timeUnixNano"]) - baseline) / 1000.0,
                        "pid": span["perfetto_span"]["pid"],
                        "tid": span["perfetto_span"]["tid"],
                        "name": event.get("name", "unknown"),
                        "args": attributes_dict,
                    }

                    span["perfetto_events"].append(perfetto_event)


def generate_perfetto_links(otel_bootstrap_room_spans):
    for bootstrap_room_span in otel_bootstrap_room_spans:
        for span in bootstrap_room_span["spans"]:
            span["perfetto_links"] = []
            if "links" in span:
                for link_span in span["links"]:
                    if "correlation" in link_span["perfetto_span"]["args"]:
                        id = link_span["perfetto_span"]["args"]["correlation"]
                    else:
                        id = next(relation_id_gen)
                        link_span["perfetto_span"]["args"]["correlation"] = id

                    perfetto_start_node = {
                        "ph": "s",
                        "id": id,
                        "pid": link_span["perfetto_span"]["pid"],
                        "tid": link_span["perfetto_span"]["tid"],
                        "ts": link_span["perfetto_span"]["ts"],
                        "cat": "ac2g",
                        "name": "ac2g",
                    }

                    perfetto_end_node = {
                        "ph": "f",
                        "id": id,
                        "pid": span["perfetto_span"]["pid"],
                        "tid": span["perfetto_span"]["tid"],
                        "ts": span["perfetto_span"]["ts"],
                        "cat": "ac2g",
                        "name": "ac2g",
                        "bp": "e",
                    }

                    span["perfetto_links"].append(perfetto_start_node)
                    span["perfetto_links"].append(perfetto_end_node)


def gather_all_perfetto_elems(
    otel_bootstrap_room_spans, thread_meta_data, slot_meta_data
):
    elems = []
    elems.extend(thread_meta_data.values())
    elems.extend(slot_meta_data)
    for bootstrap_room_span in otel_bootstrap_room_spans:
        for span in bootstrap_room_span["spans"]:
            elems.append(span["perfetto_span"])
            elems.extend(span["perfetto_events"])
            elems.extend(span["perfetto_links"])

    return elems


def write_json(perfetto_elems):
    global perfetto_data

    if args.torch_file:
        perfetto_data["traceEvents"].extend(perfetto_elems)
        filered_data = [
            item
            for item in perfetto_data["traceEvents"]
            if item.get("cat") != "gpu_user_annotation"
        ]
        perfetto_data["traceEvents"] = filered_data
    else:
        perfetto_data = perfetto_elems

    with open(args.output_file, "w", encoding="utf-8") as file:
        json.dump(perfetto_data, file, ensure_ascii=False, indent=4)


def main():
    start_time = time.time()
    otel_data = load_otel_data(args.input_file)
    otel_spans = extract_all_otel_spans(otel_data)
    otel_bootstrap_room_spans = build_otel_span_tree(otel_spans)
    thread_meta_data = {}
    generate_perfetto_span(otel_bootstrap_room_spans, thread_meta_data)
    slot_meta_data = []
    generate_perfetto_span_layout(otel_bootstrap_room_spans, slot_meta_data)
    generate_perfetto_events(otel_bootstrap_room_spans)
    generate_perfetto_links(otel_bootstrap_room_spans)
    perfetto_elems = gather_all_perfetto_elems(
        otel_bootstrap_room_spans, thread_meta_data, slot_meta_data
    )
    write_json(perfetto_elems)
    end_time = time.time()
    execution_time = end_time - start_time
    print(f"\nConversion finished successfully!")
    print(f"Output written to: {args.output_file}")
    print(f"Execution time: {execution_time * 1000:.4f} ms")


if __name__ == "__main__":
    main()