"llm/rust/triton-llm/src/common.rs" did not exist on "5ed8c1c0ffb607136ff52777ad65c878a33a393d"
Cargo.toml 4.23 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[package]
Neelay Shah's avatar
Neelay Shah committed
17
name = "dynamo-llm"
18
19
20
21
22
23
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
homepage.workspace = true

24
[features]
25
default = []
26
mistralrs = ["dep:mistralrs"]
27
llamacpp = ["dep:llama-cpp-2"]
28
sglang = ["dep:async_zmq"]
29
sentencepiece = ["dep:sentencepiece"]
Graham King's avatar
Graham King committed
30
vllm = ["dep:async_zmq"]
31
python = ["dep:pyo3-async-runtimes", "dep:pythonize"]
Graham King's avatar
Graham King committed
32
trtllm = []
33
cuda_kv = ["dep:cudarc", "dep:ndarray"]
34
35
36
37

cuda = ["mistralrs/cuda", "llama-cpp-2/cuda"]
metal = ["mistralrs/metal", "llama-cpp-2/metal"]
vulkan = ["llama-cpp-2/vulkan"]
38

39
40
[dependencies]
# repo
Neelay Shah's avatar
Neelay Shah committed
41
dynamo-runtime = { workspace = true }
42
43
44
45
46
47
48
49

# workspace
anyhow = { workspace = true }
async-stream = { workspace = true }
async-trait = { workspace = true }
bytes = { workspace = true }
derive_builder = {workspace = true }
futures =  { workspace = true }
50

51
52
53
54
55
56
57
58
serde = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
tokio-stream = { workspace = true }
tokio-util = { workspace = true }
tracing = { workspace = true }
validator = { workspace = true }
uuid = { workspace = true }
59
xxhash-rust = { workspace = true }
60
strum = { workspace = true }
61

Paul Hendricks's avatar
Paul Hendricks committed
62
async-openai = "0.27.2"
63
blake3 = "1"
64
65
66
bytemuck = "1.22"
derive-getters = "0.5"
rand = "0.9"
67
regex = "1"
68
69
70
71
72
73
74
75
rayon = "1"

# kv_cuda
cudarc = { git = "https://github.com/coreylowman/cudarc.git", rev = "8c52e735b55bf8e979e1a16bd85e3dfe4f87c9fe", features = ["cuda-12040"], optional = true }
ndarray = { version = "0.16", optional = true }
# candle-core = { version = "0.8.3", features = ["cuda"], optional = true }
# half = "2.4.1"

76
77
78
79
80
81
pyo3 = { version = "0.23.3", default-features = false, features = [
  "macros",
  "experimental-async",
  "experimental-inspect",
  "py-clone",
] }
82

83
# protocols
84
85
86
87
88
89
90
chrono = { version = "0.4", default-features = false, features = [
  "alloc",
  "std",
  "clock",
  "now",
  "serde",
] }
91
92
93
94
95
96
97
serde_json = { version = "1" }
unicode-segmentation = "1.12"

# http-service
axum = "0.8"
prometheus = { version = "0.13" }

98
99
100
# mistralrs
either = { version = "1.13" }
indexmap = { version = "2.6" }
101
mistralrs = { git = "https://github.com/EricLBuehler/mistral.rs.git", rev = "5e689c9", optional = true }
102

103
104
105
106
107
# sglang
async_zmq = { version = "0.4.0", optional = true }
libc = "0.2"
serde-pickle = "1.2.0"

108
109
110
# llamacpp
llama-cpp-2 = { version = "0.1.86", optional = true }

Biswa Panda's avatar
Biswa Panda committed
111
112
113
114
# tokenizers
tokenizers = { version = "0.21.0", default-features = false, features = [
  "onig",
  "esaxx_fast",
115
116
  # Waiting for release: https://github.com/huggingface/tokenizers/issues/1736
  # "rustls-tls",
Biswa Panda's avatar
Biswa Panda committed
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
] }
sentencepiece = { version = "0.11.2", optional = true }

# backend
galil-seiferas = { version = "0.1" }
toktrie = {version = "0.6.28" }
toktrie_hf_tokenizers =  { version = "0.6.28" }

# preprocessor
bs62 = { version = "0.1" }
erased-serde = { version = "0.4" }
itertools = { version = "0.14.0" }
minijinja = { version = "2.3.1", features = ["loader"] }
minijinja-contrib = { version = "2.3.1", features = ["pycompat"] }
semver = { version = "1", features = ["serde"] }

Graham King's avatar
Graham King committed
133
134
135
# trtllm
serde_repr = "0.1"

136
137
138
139
140
141
142
143
144
145
# python
pyo3-async-runtimes = { version = "0.23.0", optional = true, default-features = false, features = [
  "attributes",
  "testing",
  "tokio-runtime",
  "unstable-streams",
] }
pythonize = { version = "0.23", optional = true }


146
147
148
[dev-dependencies]
proptest = "1.5.0"
reqwest = { version = "0.12", default-features = false, features = ["json", "stream", "rustls-tls"] }
149
rstest = "0.18.2"
150
rstest_reuse = "0.7.0"
151
tempfile = "3.17.1"
Biswa Panda's avatar
Biswa Panda committed
152
153
154
155
156
157
158
159
hf-hub = "0.4.1"
insta = { version = "1.41", features = [
  "glob",
  "json",
  "redactions",
  "filters",
] }

Graham King's avatar
Graham King committed
160
161
162
[build-dependencies]
bindgen = "0.70"
cmake = "0.1"