"examples/backends/vllm/launch/disagg.sh" did not exist on "93208162753986f9449d3671d6a263dfc4f4381e"
Cargo.toml 4.55 KB
Newer Older
1
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
3
4
5
# SPDX-License-Identifier: Apache-2.0

[workspace]
members = [
6
    "launch/dynamo-run",
7
8
    "lib/llm",
    "lib/runtime",
9
    "lib/config",
10
    "lib/tokens",
11
    "lib/kv-router",
12
    "lib/memory",
13
    "lib/async-openai",
14
    "lib/parsers",
15
    "lib/bindings/c",
16
    "lib/bindings/python/codegen",
17
    "lib/engines/*",
18
    "lib/config",
19
]
20
21
22
23
24
25
26
# Exclude certain packages that are slow to build and we don't ship as flagship
# features from default build, but keep them in workspace for convenience.
# - launch/dynamo-run
# - lib/engines/*
default-members = [
    "lib/llm",
    "lib/runtime",
27
    "lib/config",
28
    "lib/tokens",
29
    "lib/memory",
30
31
32
33
    "lib/async-openai",
    "lib/parsers",
    "lib/bindings/c",
]
34
resolver = "3"
35
36

[workspace.package]
37
version = "0.9.0"
38
edition = "2024"
39
40
description = "Dynamo Inference Framework"
authors = ["NVIDIA Inc. <sw-dl-dynamo@nvidia.com>"]
41
license = "Apache-2.0"
Neelay Shah's avatar
Neelay Shah committed
42
43
homepage = "https://github.com/ai-dynamo/dynamo"
repository = "https://github.com/ai-dynamo/dynamo.git"
44
keywords = ["llm", "genai", "inference", "nvidia", "distributed"]
45
46

[workspace.dependencies]
47
# Local crates
48
49
50
51
dynamo-runtime = { path = "lib/runtime", version = "0.9.0" }
dynamo-llm = { path = "lib/llm", version = "0.9.0" }
dynamo-config = { path = "lib/config", version = "0.9.0" }
dynamo-tokens = { path = "lib/tokens", version = "0.9.0" }
52
dynamo-memory = { path = "lib/memory", version = "0.9.0" }
53
dynamo-kv-router = { path = "lib/kv-router", version = "0.9.0", features = ["metrics"] }
54
dynamo-async-openai = { path = "lib/async-openai", version = "0.9.0", features = ["byot"] }
55
dynamo-parsers = { path = "lib/parsers", version = "0.9.0" }
56

57
# External dependencies
58
anyhow = { version = "1" }
59
async-nats = { version = "0.45.0", features = ["service"] }
60
61
async-stream = { version = "0.3" }
async-trait = { version = "0.1" }
62
63
64
async_zmq = { version = "0.4.0" }
blake3 = { version = "1" }
bytes = { version = "1" }
65
66
67
68
69
70
71
chrono = { version = "0.4", default-features = false, features = [
    "alloc",
    "std",
    "clock",
    "now",
    "serde",
] }
72
cudarc = { version = "0.17.8", features = ["cuda-12020"] }
73
dashmap = { version = "6.1" }
74
75
76
derive_builder = { version = "0.20" }
derive-getters = { version = "0.5" }
either = { version = "1.13", features = ["serde"] }
77
etcd-client = { version = "0.17.0", features = ["tls"] }
78
futures = { version = "0.3" }
79
80
81
82
83
84
85
hf-hub = { version = "0.4.2", default-features = false, features = [
    "tokio",
    "rustls-tls",
    "ureq",
] }

# ModelExpress for model downloading
86
87
modelexpress-client = { version = "0.2.0" }
modelexpress-common = { version = "0.2.0" }
88

89
humantime = { version = "2.2.0" }
90
libc = { version = "0.2" }
91
oneshot = { version = "0.1.13", features = ["std", "async"] }
92
parking_lot = "0.12.5"
93
prometheus = { version = "0.14" }
94
95
rand = { version = "0.9.2" }
reqwest = { version = "0.12.24", default-features = false, features = [
96
97
98
99
    "json",
    "stream",
    "rustls-tls",
] }
100
rmp-serde = { version = "1" }
101
102
# "rc" is for async-openai. Allows serializing Rc and Arc. Generally avoid doing that.
serde = { version = "1", features = ["derive", "rc"] }
103
serde_json = { version = "1" }
104
strum = { version = "0.27", features = ["derive"] }
105
tempfile = "3"
106
thiserror = { version = "2.0.17" }
107
108
tmq = { version = "0.5.0" }
zmq = { version = "0.10" }
109
110
tokio = { version = "1", features = ["full"] }
tokio-stream = { version = "0.1" }
111
tokio-util = { version = "0.7", features = ["codec", "net", "rt", "io-util"] }
112
tower-http = { version = "0.6", features = ["trace"] }
113
axum = { version = "=0.8.4", features = ["macros"] }
114
axum-core = { version = "0.5.2" }
115
116
hyper = { version = "=1.7.0" }
hyper-util = { version = "=0.1.17" }
117
tracing = { version = "0.1" }
118
119
120
121
122
tracing-subscriber = { version = "0.3", features = [
    "env-filter",
    "local-time",
    "json",
] }
123
124
125
126
tracing-opentelemetry = { version = "0.32.0" }
opentelemetry = { version = "0.31.0", features = ["trace"] }
opentelemetry_sdk = { version = "0.31.0", features = ["trace", "rt-tokio"] }
opentelemetry-otlp = { version = "0.31.0", features = ["trace", "grpc-tonic"] }
127
validator = { version = "0.20.0", features = ["derive"] }
128
uuid = { version = "1.18.1", features = ["v4", "serde"] }
129
url = { version = "2.5", features = ["serde"] }
130
xxhash-rust = { version = "0.8", features = ["xxh3", "const_xxh3"] }
Graham King's avatar
Graham King committed
131
132
133

[profile.dev.package]
insta.opt-level = 3
134

135
136
[profile.dev]
# release level optimizations otherwise everything feels slow
137
# opt-level = 3
138

139
[profile.release]
140
# These make the build much slower but shrink the binary, and could help performance
141
codegen-units = 1
142
lto = "thin"
143