# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. [package] name = "dynamo-llm" version.workspace = true edition.workspace = true authors.workspace = true license.workspace = true homepage.workspace = true repository.workspace = true readme.workspace = true description = "Dynamo LLM Library" [features] default = [] cuda_kv = ["dep:cudarc", "dep:ndarray"] sentencepiece = ["dep:sentencepiece"] [dependencies] # repo dynamo-runtime = { workspace = true } # workspace anyhow = { workspace = true } async-stream = { workspace = true } async-trait = { workspace = true } async-nats = { workspace = true } async_zmq = { workspace = true } bytes = { workspace = true } chrono = { workspace = true } derive_builder = {workspace = true } either = { workspace = true } futures = { workspace = true } rand = { workspace = true } prometheus = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } tokio-stream = { workspace = true } tokio-util = { workspace = true } tracing = { workspace = true } validator = { workspace = true } url = { workspace = true } uuid = { workspace = true } xxhash-rust = { workspace = true } strum = { workspace = true } akin = "0.4.0" async-openai = "0.27.2" blake3 = "1" bytemuck = "1.22" candle-core = { version = "0.8.0" } derive-getters = "0.5" regex = "1" rayon = "1" # kv_cuda cudarc = { git = "https://github.com/coreylowman/cudarc.git", rev = "8c52e735b55bf8e979e1a16bd85e3dfe4f87c9fe", features = ["cuda-12040"], optional = true } ndarray = { version = "0.16", optional = true } # protocols unicode-segmentation = "1.12" # http-service axum = "0.8" # tokenizers tokenizers = { version = "0.21.1", default-features = false, features = [ "onig", "esaxx_fast", "rustls-tls", ] } sentencepiece = { version = "0.11.2", optional = true } # backend galil-seiferas = { version = "0.1" } toktrie = { version = "0.6.28" } toktrie_hf_tokenizers = { version = "0.6.28" } # preprocessor bs62 = { version = "0.1" } erased-serde = { version = "0.4" } itertools = { version = "0.14.0" } minijinja = { version = "2.3.1", features = ["loader"] } minijinja-contrib = { version = "2.3.1", features = ["pycompat"] } # GGUF ggus = "0.4.0" memmap2 = "0.9.5" [dev-dependencies] hf-hub = { workspace = true } proptest = "1.5.0" reqwest = { version = "0.12", default-features = false, features = ["json", "stream", "rustls-tls"] } rstest = "0.18.2" rstest_reuse = "0.7.0" tempfile = "3.17.1" insta = { version = "1.41", features = [ "glob", "json", "redactions", "filters", ] }