Cargo.toml 6.09 KB
Newer Older
1
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
3
4
# SPDX-License-Identifier: Apache-2.0

[package]
Neelay Shah's avatar
Neelay Shah committed
5
name = "dynamo-llm"
6
7
8
9
10
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
homepage.workspace = true
11
12
13
repository.workspace = true
readme.workspace = true
description = "Dynamo LLM Library"
14

15
[features]
16
default = ["block-manager"]
Ryan Olson's avatar
Ryan Olson committed
17
# todo(ops): get this working in CI as a default.
18
19
# default = ["block-manager", "testing-full"]

20
testing-full = ["testing-cuda", "testing-nixl"]
21
22
testing-cuda = ["dep:cudarc", "dynamo-memory/testing-cuda"]
testing-nixl = ["dep:nixl-sys", "dynamo-memory/testing-nixl"]
Ryan Olson's avatar
Ryan Olson committed
23
testing-etcd = []
24
25
# Forward the NVTX feature to dynamo-runtime (build with --features nvtx or dynamo-llm/nvtx)
nvtx = ["dynamo-runtime/nvtx"]
26
27
block-manager = ["dep:nixl-sys", "dep:cudarc", "dep:nix", "dep:aligned-vec"]
nccl = ["dep:cudarc", "cudarc/nccl"]  # Enable NCCL collective operations
28
block-manager-bench = ["block-manager", "testing-full", "dep:clap", "dep:indicatif"]
29
30
cuda = ["dep:cudarc"]
integration = ["dynamo-runtime/integration"]
31
media-ffmpeg = ["dep:video-rs", "dep:ffmpeg-next", "dep:memfile"]
32
33
bench = ["dynamo-kv-router/bench"]
kv-router-stress = ["dep:clap", "dep:indicatif", "bench"]
34

35
[[bench]]
36
37
38
39
40
name = "tokenizer_simple"
harness = false

[[bench]]
name = "tokenizer_dataset"
41
42
harness = false

43
44
45
46
[[bench]]
name = "transfer_context_v2"
harness = false
required-features = ["block-manager", "testing-cuda"]
47

48
49
50
51
52
[[bench]]
name = "kv_router_bench"
harness = false
required-features = ["kv-router-stress"]

53
54
[dependencies]
# repo
Ryan Olson's avatar
Ryan Olson committed
55
dynamo-config = { workspace = true }
56
dynamo-kv-router = { workspace = true, features = ["metrics", "runtime-protocols"] }
57
dynamo-memory = { workspace = true }
58
dynamo-mocker = { workspace = true }
59
60
dynamo-runtime = { workspace = true }
dynamo-tokens = { workspace = true }
61
62

# workspace
Ryan Olson's avatar
Ryan Olson committed
63
aho-corasick = "1.1"
64
anyhow = { workspace = true }
65
dynamo-protocols = { workspace = true }
66
dynamo-parsers = { workspace = true }
67
68
async-stream = { workspace = true }
async-trait = { workspace = true }
69
async-nats = { workspace = true }
70
bytes = { workspace = true }
71
chrono = { workspace = true }
Ryan Olson's avatar
Ryan Olson committed
72
dashmap = { workspace = true }
73
derive_builder = { workspace = true }
74
either = { workspace = true }
75
futures = { workspace = true }
76
futures-util = { workspace = true }
77
hf-hub = { workspace = true }
78
rand = { workspace = true }
Ryan Olson's avatar
Ryan Olson committed
79
oneshot = { workspace = true }
80
parking_lot = { workspace = true }
81
prometheus = { workspace = true }
82
serde = { workspace = true }
83
serde_json = { workspace = true }
84
sha2 = "0.10"
85
strum = { workspace = true }
86
tempfile = { workspace = true }
87
thiserror = { workspace = true }
88
tmq = { workspace = true }
89
90
91
92
93
tokio = { workspace = true }
tokio-stream = { workspace = true }
tokio-util = { workspace = true }
tracing = { workspace = true }
validator = { workspace = true }
94
url = { workspace = true }
95
uuid = { workspace = true }
96
xxhash-rust = { workspace = true }
97
98
modelexpress-client = { workspace = true }
modelexpress-common = { workspace = true }
Ryan Olson's avatar
Ryan Olson committed
99
100


101
bitflags = { version = "2.4", features = ["serde"] }
102
blake3 = { version = "1.8", features = ["mmap", "rayon"] }
103
104
bytemuck = "1.22"
derive-getters = "0.5"
105
offset-allocator = "0.2"
106
rayon = "1"
107
bincode = { version = "2.0.1", features = ["serde", "derive"] }
108

109
110
111
# lora
object_store = { version = "0.12.4", features = ["aws", "gcp", "azure", "http"] }

112
# input/text
113
114
115
116
dialoguer = { version = "0.11", default-features = false, features = [
  "editor",
  "history",
] }
117

Ryan Olson's avatar
Ryan Olson committed
118
# block_manager
119
aligned-vec = { version = "0.6.4", optional = true }
120
nixl-sys = { version = "=0.10.1", optional = true }
121
cudarc = { workspace = true, optional = true }
122
nix = { version = "0.26", optional = true }
123

124
125
# media (zlib compression for NIXL metadata)
flate2 = { version = "1" }
126

127
128
129
130
# block_manager_bench
clap = { version = "4.5.49", features = ["derive"], optional = true }
indicatif = { version = "0.18.0", optional = true }

131
# http-service
132
axum = { workspace = true }
Graham King's avatar
Graham King committed
133
axum-server = { version = "0.7", features = ["tls-rustls"] }
134
tower-http = { workspace = true }
Graham King's avatar
Graham King committed
135
rustls = { version = "0.23" }
136
137
utoipa = { version = "5.3", features = ["axum_extras"] }
utoipa-swagger-ui = { version = "9.0", features = ["axum"] }
138

139

GuanLuo's avatar
GuanLuo committed
140
141
142
143
144
145
146
# grpc-service
# ping version to 0.13.1 so it depends on prost 0.13.5
# which is used across other libraries
tonic = { version = "0.13.1" }
# Request prost specifically so tonic-build properly compiles protobuf message
prost = { version = "0.13.5" }

Biswa Panda's avatar
Biswa Panda committed
147
# tokenizers
148
tokenizers = { version = "0.21.4", default-features = false, features = [
Biswa Panda's avatar
Biswa Panda committed
149
150
  "onig",
  "esaxx_fast",
151
  "rustls-tls",
Biswa Panda's avatar
Biswa Panda committed
152
] }
Nikita's avatar
Nikita committed
153
154
tiktoken-rs = { version = "0.9", default-features = false }
rustc-hash = "1.1"
155
fastokens = { workspace = true }
Biswa Panda's avatar
Biswa Panda committed
156
157
158
159
160
161

# backend
galil-seiferas = { version = "0.1" }

# preprocessor
bs62 = { version = "0.1" }
162
163
minijinja = { version = "2.15.1", features = ["loader", "loop_controls"] }
minijinja-contrib = { version = "2.15.1", features = ["pycompat"] }
164
json-five = { version = "0.3" }
165

166
167
168
# media loading in the preprocessor
reqwest = { workspace = true }
base64 = { version = "0.22" }
169
image = { version = "0.25", features = ["serde"] }
170
171
video-rs = { version = "0.11.0", optional = true }
ffmpeg-next = { version = "8.1.0", optional = true }
172
memfile = { version = "0.3.2", optional = true }
173
174
tokio-rayon = {version = "2" }
ndarray = { version = "0.16" }
175

176
177
178
# Publishers
rmp-serde = "1.3"

179
[dev-dependencies]
180
approx = "0.5"
181
assert_matches = "1.5"
182
criterion = { version = "0.3", features = ["html_reports"] }
183
hf-hub = { workspace = true }
184
185
modelexpress-client = { workspace = true }
modelexpress-common = { workspace = true }
186
reqwest = { workspace = true }
187
rstest = "0.18.2"
188
serial_test = "3"
189
temp-env = { version = "0.3.6", features = ["async_closure"] }
190
tempfile = "3.17.1"
Biswa Panda's avatar
Biswa Panda committed
191
192
193
194
195
196
insta = { version = "1.41", features = [
  "glob",
  "json",
  "redactions",
  "filters",
] }
197

198
lazy_static = "1.4"
199
mockito = "1.7.0"
Yan Ru Pei's avatar
Yan Ru Pei committed
200
dynamo-bench = { path = "../bench" }
GuanLuo's avatar
GuanLuo committed
201

202
203
204
205
[[bin]]
name = "generate-frontend-openapi"
path = "src/bin/generate_frontend_openapi.rs"

GuanLuo's avatar
GuanLuo committed
206
[build-dependencies]
207
tonic-build = { version = "0.13.1" }
208
209
210
211
212

[[bin]]
name = "bench_local_transfer_v2"
path = "bin/bench_local_transfer_v2.rs"
required-features = ["block-manager-bench"]