api.py 2.72 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from dynamo._core import (
    run_mocker_synthetic_trace_replay as _run_mocker_synthetic_trace_replay,
)
from dynamo._core import run_mocker_trace_replay as _run_mocker_trace_replay


def run_trace_replay(
    trace_file,
    *,
    extra_engine_args=None,
14
15
    prefill_engine_args=None,
    decode_engine_args=None,
16
    router_config=None,
17
    aic_perf_config=None,
18
    num_workers=1,
19
20
    num_prefill_workers=1,
    num_decode_workers=1,
21
22
23
24
    replay_concurrency=None,
    replay_mode="offline",
    router_mode="round_robin",
    arrival_speedup_ratio=1.0,
25
    trace_block_size=512,
26
27
28
29
):
    return _run_mocker_trace_replay(
        trace_file,
        extra_engine_args=extra_engine_args,
30
31
        prefill_engine_args=prefill_engine_args,
        decode_engine_args=decode_engine_args,
32
        router_config=router_config,
33
        aic_perf_config=aic_perf_config,
34
        num_workers=num_workers,
35
36
        num_prefill_workers=num_prefill_workers,
        num_decode_workers=num_decode_workers,
37
38
39
40
        replay_concurrency=replay_concurrency,
        replay_mode=replay_mode,
        router_mode=router_mode,
        arrival_speedup_ratio=arrival_speedup_ratio,
41
        trace_block_size=trace_block_size,
42
43
44
45
46
47
48
49
50
    )


def run_synthetic_trace_replay(
    input_tokens,
    output_tokens,
    request_count,
    *,
    extra_engine_args=None,
51
52
    prefill_engine_args=None,
    decode_engine_args=None,
53
    router_config=None,
54
    aic_perf_config=None,
55
    num_workers=1,
56
57
    num_prefill_workers=1,
    num_decode_workers=1,
58
59
60
61
62
    replay_concurrency=None,
    replay_mode="offline",
    router_mode="round_robin",
    arrival_speedup_ratio=1.0,
    arrival_interval_ms=1.0,
63
64
65
66
    turns_per_session=1,
    shared_prefix_ratio=0.0,
    num_prefix_groups=0,
    inter_turn_delay_ms=0.0,
67
68
69
70
71
72
):
    return _run_mocker_synthetic_trace_replay(
        input_tokens,
        output_tokens,
        request_count,
        extra_engine_args=extra_engine_args,
73
74
        prefill_engine_args=prefill_engine_args,
        decode_engine_args=decode_engine_args,
75
        router_config=router_config,
76
        aic_perf_config=aic_perf_config,
77
        num_workers=num_workers,
78
79
        num_prefill_workers=num_prefill_workers,
        num_decode_workers=num_decode_workers,
80
81
82
83
84
        replay_concurrency=replay_concurrency,
        replay_mode=replay_mode,
        router_mode=router_mode,
        arrival_speedup_ratio=arrival_speedup_ratio,
        arrival_interval_ms=arrival_interval_ms,
85
86
87
88
        turns_per_session=turns_per_session,
        shared_prefix_ratio=shared_prefix_ratio,
        num_prefix_groups=num_prefix_groups,
        inter_turn_delay_ms=inter_turn_delay_ms,
89
    )