Commit 1fb31d6a authored by Graham King's avatar Graham King Committed by GitHub
Browse files

feat(dynamo-run): Various UX improvements (#168)

Engines mistralrs, sglang and vllm included by default. Can be disabled like this: `cargo build --no-default-features --features <add-back-what-you-want>`.

Added `--feature vulkan` option, for llamacpp.

Build time message if CUDA or Metal would help and are missing. That's the best we can do:
> warning: dynamo-run@0.1.0: CUDA not enabled, re-run with `--features cuda`

Runtime message if CUDA, Metal or Vulkan are enabled:
> 2025-03-14T21:59:26.501937Z  INFO dynamo_run: CUDA on

Runtime message if they are missing:
> 2025-03-14T22:02:37.439404Z  INFO dynamo_run: CPU mode. Rebuild with `--features cuda|metal|vulkan` for better performance

Defaut engine message includes available engines:
> 2025-03-14T21:59:26.503612Z  INFO dynamo_run: Using default engine: mistralrs. Use out=<engine> to specify one of echo_core, echo_full, mistralrs, llamacpp, sglang, vllm, pystr, pytok

The really important outcome is that this should now "just work":
```
cargo install dynamo-run
dynamo-run Qwen/Qwen2.5-3B-Instruct
```

Sadly you still need `--features cuda|metal` for performance, I couldn't automate that.
parent f465aca3
...@@ -84,7 +84,7 @@ jobs: ...@@ -84,7 +84,7 @@ jobs:
working-directory: ${{ matrix.dir }} working-directory: ${{ matrix.dir }}
run: | run: |
cargo-deny --version || cargo install cargo-deny@0.16.4 cargo-deny --version || cargo install cargo-deny@0.16.4
cargo-deny check --hide-inclusion-graph licenses --config ${{ github.workspace }}/deny.toml cargo-deny --no-default-features check --hide-inclusion-graph licenses --config ${{ github.workspace }}/deny.toml
- name: Run Unit Tests - name: Run Unit Tests
working-directory: ${{ matrix.dir }} working-directory: ${{ matrix.dir }}
run: cargo test --locked --all-targets run: cargo test --locked --all-targets
...@@ -28,10 +28,20 @@ allow = [ ...@@ -28,10 +28,20 @@ allow = [
"OpenSSL", "OpenSSL",
"Unicode-3.0", "Unicode-3.0",
"BSL-1.0", "BSL-1.0",
"MPL-2.0" "MPL-2.0",
"MIT-0"
] ]
# TODO exceptions
# MIT: https://github.com/guidance-ai/llguidance
# "llguidance",
# MIT: https://github.com/guidance-ai/llguidance/toktrie
# "toktrie",
# MIT: https://github.com/guidance-ai/llguidance/toktrie_hf_tokenizers
# "toktrie_hf_tokenizers",
[[licenses.clarify]] [[licenses.clarify]]
name = "ring" name = "ring"
expression = "MIT AND ISC AND OpenSSL" expression = "MIT AND ISC AND OpenSSL"
license-files = [ license-files = [
......
...@@ -23,14 +23,18 @@ license.workspace = true ...@@ -23,14 +23,18 @@ license.workspace = true
repository.workspace = true repository.workspace = true
[features] [features]
# Build with `--no-default-features` to disable these defaults
default = ["mistralrs", "vllm", "sglang"]
mistralrs = ["dynamo-llm/mistralrs"] mistralrs = ["dynamo-llm/mistralrs"]
sglang = ["dynamo-llm/sglang", "dep:netlink-packet-route", "dep:rtnetlink"] sglang = ["dynamo-llm/sglang", "dep:netlink-packet-route", "dep:rtnetlink"]
vllm = ["dynamo-llm/vllm", "dep:netlink-packet-route", "dep:rtnetlink"] vllm = ["dynamo-llm/vllm", "dep:netlink-packet-route", "dep:rtnetlink"]
# We don't include llamacpp by default until we figure out when it needs external libraries
llamacpp = ["dynamo-llm/llamacpp"] llamacpp = ["dynamo-llm/llamacpp"]
trtllm = ["dynamo-llm/trtllm"] trtllm = ["dynamo-llm/trtllm"]
python = ["dynamo-llm/python"] python = ["dynamo-llm/python"]
cuda = ["dynamo-llm/cuda"] cuda = ["dynamo-llm/cuda"]
metal = ["dynamo-llm/metal"] metal = ["dynamo-llm/metal"]
vulkan = ["dynamo-llm/vulkan"]
[dependencies] [dependencies]
dynamo-llm = { workspace = true } dynamo-llm = { workspace = true }
...@@ -55,4 +59,4 @@ tracing-subscriber = { version = "0.3", features = ["env-filter", "local-time", ...@@ -55,4 +59,4 @@ tracing-subscriber = { version = "0.3", features = ["env-filter", "local-time",
[target.x86_64-unknown-linux-gnu.dependencies] [target.x86_64-unknown-linux-gnu.dependencies]
netlink-packet-route = { version = "0.19", optional = true } netlink-packet-route = { version = "0.19", optional = true }
rtnetlink = { version = "0.14", optional = true } rtnetlink = { version = "0.14", optional = true }
\ No newline at end of file
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::env;
use std::process::Command;
fn main() {
if has_cuda_toolkit() && !has_feature("cuda") && is_cuda_engine() {
println!("cargo:warning=CUDA not enabled, re-run with `--features cuda`");
}
if is_mac() && !has_feature("metal") {
println!("cargo:warning=Metal not enabled, re-run with `--features metal`");
}
}
fn has_feature(s: &str) -> bool {
env::var(format!("CARGO_FEATURE_{}", s.to_uppercase())).is_ok()
}
fn has_cuda_toolkit() -> bool {
if let Ok(output) = Command::new("nvcc").arg("--version").output() {
output.status.success()
} else {
false
}
}
fn is_cuda_engine() -> bool {
has_feature("mistralrs") || has_feature("llamacpp")
}
#[cfg(target_os = "macos")]
fn is_mac() -> bool {
true
}
#[cfg(not(target_os = "macos"))]
fn is_mac() -> bool {
false
}
...@@ -108,6 +108,25 @@ fn main() -> anyhow::Result<()> { ...@@ -108,6 +108,25 @@ fn main() -> anyhow::Result<()> {
} }
} }
} }
#[cfg(any(feature = "mistralrs", feature = "llamacpp"))]
{
#[cfg(feature = "cuda")]
{
tracing::info!("CUDA on");
}
#[cfg(feature = "metal")]
{
tracing::info!("Metal on");
}
#[cfg(feature = "vulkan")]
{
tracing::info!("Vulkan on");
}
#[cfg(not(any(feature = "cuda", feature = "metal", feature = "vulkan")))]
tracing::info!(
"CPU mode. Rebuild with `--features cuda|metal|vulkan` for better performance"
);
}
// max_worker_threads and max_blocking_threads from env vars or config file. // max_worker_threads and max_blocking_threads from env vars or config file.
let rt_config = dynamo_runtime::RuntimeConfig::from_settings()?; let rt_config = dynamo_runtime::RuntimeConfig::from_settings()?;
...@@ -165,7 +184,8 @@ async fn wrapper(runtime: dynamo_runtime::Runtime) -> anyhow::Result<()> { ...@@ -165,7 +184,8 @@ async fn wrapper(runtime: dynamo_runtime::Runtime) -> anyhow::Result<()> {
None => { None => {
let default_engine = Output::default(); // smart default based on feature flags let default_engine = Output::default(); // smart default based on feature flags
tracing::info!( tracing::info!(
"Using default engine: {default_engine}. Use out=<engine> to specify an engine." "Using default engine: {default_engine}. Use out=<engine> to specify one of {}",
Output::available_engines().join(", ")
); );
default_engine default_engine
} }
......
...@@ -204,20 +204,15 @@ impl fmt::Display for Output { ...@@ -204,20 +204,15 @@ impl fmt::Display for Output {
} }
} }
/// Returns the engine to use if user did not say on cmd line /// Returns the engine to use if user did not say on cmd line.
/// Uses whatever was compiled in, with a priority ordering. /// Nearly always defaults to mistralrs which has no dependencies and we include by default.
/// If built with --no-default-features and a specific engine, default to that.
#[allow(unused_assignments, unused_mut)] #[allow(unused_assignments, unused_mut)]
impl Default for Output { impl Default for Output {
fn default() -> Self { fn default() -> Self {
// Default if no engines // Default if no engines
let mut out = Output::EchoFull; let mut out = Output::EchoFull;
// Runs everywhere but needs local CUDA to build
#[cfg(feature = "mistralrs")]
{
out = Output::MistralRs;
}
#[cfg(feature = "llamacpp")] #[cfg(feature = "llamacpp")]
{ {
out = Output::LlamaCpp; out = Output::LlamaCpp;
...@@ -233,6 +228,11 @@ impl Default for Output { ...@@ -233,6 +228,11 @@ impl Default for Output {
out = Output::Vllm; out = Output::Vllm;
} }
#[cfg(feature = "mistralrs")]
{
out = Output::MistralRs;
}
out out
} }
} }
......
...@@ -51,11 +51,14 @@ impl Default for MultiNodeConfig { ...@@ -51,11 +51,14 @@ impl Default for MultiNodeConfig {
} }
} }
#[cfg(feature = "python")] #[cfg(any(feature = "sglang", feature = "vllm", feature = "python"))]
use pyo3::prelude::*; use pyo3::prelude::*;
/// On Mac embedded Python interpreters do not pick up the virtual env. /// On Mac embedded Python interpreters do not pick up the virtual env.
#[cfg(all(target_os = "macos", feature = "python"))] #[cfg(all(
target_os = "macos",
any(feature = "sglang", feature = "vllm", feature = "python")
))]
fn fix_venv(venv: String, py: pyo3::Python<'_>) -> anyhow::Result<()> { fn fix_venv(venv: String, py: pyo3::Python<'_>) -> anyhow::Result<()> {
let version_info = py.version_info(); let version_info = py.version_info();
let sys: PyObject = py.import("sys")?.into(); let sys: PyObject = py.import("sys")?.into();
...@@ -69,5 +72,8 @@ fn fix_venv(venv: String, py: pyo3::Python<'_>) -> anyhow::Result<()> { ...@@ -69,5 +72,8 @@ fn fix_venv(venv: String, py: pyo3::Python<'_>) -> anyhow::Result<()> {
Ok(()) Ok(())
} }
#[cfg(all(target_os = "linux", feature = "python"))] #[cfg(all(
target_os = "linux",
any(feature = "sglang", feature = "vllm", feature = "python")
))]
fn fix_venv(_venv: String, _py: Python<'_>) {} fn fix_venv(_venv: String, _py: Python<'_>) {}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment