Cargo.toml 4.54 KB
Newer Older
1
2
3
4
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

[package]
Neelay Shah's avatar
Neelay Shah committed
5
name = "dynamo-llm"
6
7
8
9
10
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
homepage.workspace = true
11
12
13
repository.workspace = true
readme.workspace = true
description = "Dynamo LLM Library"
14

15
[features]
16
default = []
Ryan Olson's avatar
Ryan Olson committed
17
# todo(ops): get this working in CI as a default.
18
19
# default = ["block-manager", "testing-full"]

20
21
22
testing-full = ["testing-cuda", "testing-nixl"]
testing-cuda = ["dep:cudarc"]
testing-nixl = ["dep:nixl-sys"]
Ryan Olson's avatar
Ryan Olson committed
23
testing-etcd = []
24
block-manager = ["dep:nixl-sys", "dep:cudarc", "dep:ndarray", "dep:nix"]
25
26
cuda = ["dep:cudarc"]
integration = ["dynamo-runtime/integration"]
27

28
29
30
31
[[bench]]
name = "tokenizer"
harness = false

32
33
34
35
[[bench]]
name = "transfer_context_v2"
harness = false
required-features = ["block-manager", "testing-cuda"]
36
37
[dependencies]
# repo
Neelay Shah's avatar
Neelay Shah committed
38
dynamo-runtime = { workspace = true }
39
40

# workspace
Ryan Olson's avatar
Ryan Olson committed
41
aho-corasick = "1.1"
42
anyhow = { workspace = true }
43
dynamo-async-openai = { workspace = true }
44
dynamo-parsers = { workspace = true }
45
46
async-stream = { workspace = true }
async-trait = { workspace = true }
47
async-nats = { workspace = true }
48
async_zmq = { workspace = true }
49
bytes = { workspace = true }
50
chrono = { workspace = true }
51
derive_builder = { workspace = true }
52
either = { workspace = true }
53
etcd-client = { workspace = true }
54
futures = { workspace = true }
Ryan Olson's avatar
Ryan Olson committed
55
futures-util = "0.3.31"
56
hf-hub = { workspace = true }
57
humantime = { workspace = true }                           # input/batch
58
rand = { workspace = true }
Ryan Olson's avatar
Ryan Olson committed
59
oneshot = { workspace = true }
60
parking_lot = "0.12.4"
61
prometheus = { workspace = true }
62
serde = { workspace = true }
63
serde_json = { workspace = true }
64
strum = { workspace = true }
65
tempfile = { workspace = true }
66
thiserror = { workspace = true }
Ryan Olson's avatar
Ryan Olson committed
67
tmq = "0.5.0"
68
69
70
71
72
tokio = { workspace = true }
tokio-stream = { workspace = true }
tokio-util = { workspace = true }
tracing = { workspace = true }
validator = { workspace = true }
73
url = { workspace = true }
74
uuid = { workspace = true }
75
xxhash-rust = { workspace = true }
76
77
modelexpress-client = { workspace = true }
modelexpress-common = { workspace = true }
78
akin = "0.4.0"
79
bitflags = { version = "2.4", features = ["serde"] }
80
blake3 = { version = "1.8", features = ["mmap", "rayon"] }
81
bytemuck = "1.22"
82
candle-core = { version = "0.9.1" }
83
derive-getters = "0.5"
84
offset-allocator = "0.2"
85
regex = "1"
86
rayon = "1"
87
dashmap = { version = "5.5.3" }
88

89
# input/text
90
91
92
93
dialoguer = { version = "0.11", default-features = false, features = [
  "editor",
  "history",
] }
94

Ryan Olson's avatar
Ryan Olson committed
95
# block_manager
96
nixl-sys = { version = "=0.6.0", optional = true }
97
cudarc = { version = "0.17.1", features = ["cuda-12020"], optional = true }
98
ndarray = { version = "0.16", optional = true }
99
nix = { version = "0.26", optional = true }
100

101
102
103
104
# protocols
unicode-segmentation = "1.12"

# http-service
105
axum = { workspace = true }
Graham King's avatar
Graham King committed
106
axum-server = { version = "0.7", features = ["tls-rustls"] }
107
tower-http = { workspace = true }
Graham King's avatar
Graham King committed
108
rustls = { version = "0.23" }
109
110
utoipa = { version = "5.3", features = ["axum_extras"] }
utoipa-swagger-ui = { version = "9.0", features = ["axum"] }
111

112

GuanLuo's avatar
GuanLuo committed
113
114
115
116
117
118
119
# grpc-service
# ping version to 0.13.1 so it depends on prost 0.13.5
# which is used across other libraries
tonic = { version = "0.13.1" }
# Request prost specifically so tonic-build properly compiles protobuf message
prost = { version = "0.13.5" }

Biswa Panda's avatar
Biswa Panda committed
120
# tokenizers
121
tokenizers = { version = "0.21.4", default-features = false, features = [
Biswa Panda's avatar
Biswa Panda committed
122
123
  "onig",
  "esaxx_fast",
124
  "rustls-tls",
Biswa Panda's avatar
Biswa Panda committed
125
126
127
128
] }

# backend
galil-seiferas = { version = "0.1" }
129
toktrie = { version = "1.1" }
130
toktrie_hf_tokenizers = { version = "1.1" }
Biswa Panda's avatar
Biswa Panda committed
131
132
133
134
135

# preprocessor
bs62 = { version = "0.1" }
erased-serde = { version = "0.4" }
itertools = { version = "0.14.0" }
136
137
minijinja = { version = "2.10.2", features = ["loader"] }
minijinja-contrib = { version = "2.10.2", features = ["pycompat"] }
138
json-five = { version = "0.3" }
139

140
141
142
# Publishers
zeromq = "0.4.1"
rmp-serde = "1.3"
143
ahash = "0.8.12"
144

145
[dev-dependencies]
146
approx = "0.5"
147
assert_matches = "1.5"
148
criterion = { version = "0.3", features = ["html_reports"] }
149
hf-hub = { workspace = true }
150
151
modelexpress-client = { workspace = true }
modelexpress-common = { workspace = true }
152
proptest = "1.5.0"
153
reqwest = { workspace = true }
154
rstest = "0.18.2"
155
rstest_reuse = "0.7.0"
156
serial_test = "3"
157
temp-env = { version = "0.3.6", features = ["async_closure"] }
158
tempfile = "3.17.1"
Biswa Panda's avatar
Biswa Panda committed
159
160
161
162
163
164
insta = { version = "1.41", features = [
  "glob",
  "json",
  "redactions",
  "filters",
] }
165
166
aligned-vec = "0.6.4"
lazy_static = "1.4"
GuanLuo's avatar
GuanLuo committed
167
168

[build-dependencies]
169
tonic-build = { version = "0.13.1" }