feat(dynamo-run): Various UX improvements (#168)

Engines mistralrs, sglang and vllm included by default. Can be disabled like this: `cargo build --no-default-features --features <add-back-what-you-want>`. Added `--feature vulkan` option, for llamacpp. Build time message if CUDA or Metal would help and are missing. That's the best we can do: > warning: dynamo-run@0.1.0: CUDA not enabled, re-run with `--features cuda` Runtime message if CUDA, Metal or Vulkan are enabled: > 2025-03-14T21:59:26.501937Z INFO dynamo_run: CUDA on Runtime message if they are missing: > 2025-03-14T22:02:37.439404Z INFO dynamo_run: CPU mode. Rebuild with `--features cuda|metal|vulkan` for better performance Defaut engine message includes available engines: > 2025-03-14T21:59:26.503612Z INFO dynamo_run: Using default engine: mistralrs. Use out=<engine> to specify one of echo_core, echo_full, mistralrs, llamacpp, sglang, vllm, pystr, pytok The really important outcome is that this should now "just work": ``` cargo install dynamo-run dynamo-run Qwen/Qwen2.5-3B-Instruct ``` Sadly you still need `--features cuda|metal` for performance, I couldn't automate that.

feat(dynamo-run): Various UX improvements (#168)
Engines mistralrs, sglang and vllm included by default. Can be disabled like this: `cargo build --no-default-features --features <add-back-what-you-want>`. Added `--feature vulkan` option, for llamacpp. Build time message if CUDA or Metal would help and are missing. That's the best we can do: > warning: dynamo-run@0.1.0: CUDA not enabled, re-run with `--features cuda` Runtime message if CUDA, Metal or Vulkan are enabled: > 2025-03-14T21:59:26.501937Z INFO dynamo_run: CUDA on Runtime message if they are missing: > 2025-03-14T22:02:37.439404Z INFO dynamo_run: CPU mode. Rebuild with `--features cuda|metal|vulkan` for better performance Defaut engine message includes available engines: > 2025-03-14T21:59:26.503612Z INFO dynamo_run: Using default engine: mistralrs. Use out=<engine> to specify one of echo_core, echo_full, mistralrs, llamacpp, sglang, vllm, pystr, pytok The really important outcome is that this should now "just work": ``` cargo install dynamo-run dynamo-run Qwen/Qwen2.5-3B-Instruct ``` Sadly you still need `--features cuda|metal` for performance, I couldn't automate that.
1fb31d6a · Graham King · GitHub · f465aca3 · 1fb31d6a · 1fb31d6a
Commit 1fb31d6a authored Mar 14, 2025 by Graham King Committed by GitHub Mar 14, 2025
7 changed files
--- a/.github/workflows/pre-merge-rust.yml
+++ b/.github/workflows/pre-merge-rust.yml
@@ -84,7 +84,7 @@ jobs:
      working-directory: ${{ matrix.dir }}
      run: |
        cargo-deny --version || cargo install cargo-deny@0.16.4
-        cargo-deny check --hide-inclusion-graph licenses --config ${{ github.workspace }}/deny.toml
+        cargo-deny --no-default-features check --hide-inclusion-graph licenses --config ${{ github.workspace }}/deny.toml
    - name: Run Unit Tests
      working-directory: ${{ matrix.dir }}
      run: cargo test --locked --all-targets
--- a/deny.toml
+++ b/deny.toml
@@ -28,10 +28,20 @@ allow = [
    "OpenSSL",
    "Unicode-3.0",
    "BSL-1.0",
-    "MPL-2.0"
+    "MPL-2.0",
+    "MIT-0"
 ]
+# TODO exceptions
+# MIT: https://github.com/guidance-ai/llguidance
+#  "llguidance",
+# MIT: https://github.com/guidance-ai/llguidance/toktrie
+#  "toktrie",
+# MIT: https://github.com/guidance-ai/llguidance/toktrie_hf_tokenizers
+#  "toktrie_hf_tokenizers",
 [[licenses.clarify]]
 name = "ring"
 expression = "MIT AND ISC AND OpenSSL"
 license-files = [

--- a/launch/dynamo-run/Cargo.toml
+++ b/launch/dynamo-run/Cargo.toml
@@ -23,14 +23,18 @@ license.workspace = true
 repository.workspace = true
 [features]
+# Build with `--no-default-features` to disable these defaults
+default = ["mistralrs", "vllm", "sglang"]
 mistralrs = ["dynamo-llm/mistralrs"]
 sglang = ["dynamo-llm/sglang", "dep:netlink-packet-route", "dep:rtnetlink"]
 vllm = ["dynamo-llm/vllm", "dep:netlink-packet-route", "dep:rtnetlink"]
+# We don't include llamacpp by default until we figure out when it needs external libraries
 llamacpp = ["dynamo-llm/llamacpp"]
 trtllm = ["dynamo-llm/trtllm"]
 python = ["dynamo-llm/python"]
 cuda = ["dynamo-llm/cuda"]
 metal = ["dynamo-llm/metal"]
+vulkan = ["dynamo-llm/vulkan"]
 [dependencies]
 dynamo-llm = { workspace = true }
@@ -55,4 +59,4 @@ tracing-subscriber = { version = "0.3", features = ["env-filter", "local-time",
 [target.x86_64-unknown-linux-gnu.dependencies]
 netlink-packet-route = { version = "0.19", optional = true }
 rtnetlink = { version = "0.14", optional = true }
\ No newline at end of file
--- a/launch/dynamo-run/build.rs
+++ b/launch/dynamo-run/build.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+use std::env;
+use std::process::Command;
+fn main() {
+    if has_cuda_toolkit() && !has_feature("cuda") && is_cuda_engine() {
+        println!("cargo:warning=CUDA not enabled, re-run with `--features cuda`");
+    }
+    if is_mac() && !has_feature("metal") {
+        println!("cargo:warning=Metal not enabled, re-run with `--features metal`");
+    }
+}
+fn has_feature(s: &str) -> bool {
+    env::var(format!("CARGO_FEATURE_{}", s.to_uppercase())).is_ok()
+}
+fn has_cuda_toolkit() -> bool {
+    if let Ok(output) = Command::new("nvcc").arg("--version").output() {
+        output.status.success()
+    } else {
+        false
+    }
+}
+fn is_cuda_engine() -> bool {
+    has_feature("mistralrs") || has_feature("llamacpp")
+}
+#[cfg(target_os = "macos")]
+fn is_mac() -> bool {
+    true
+}
+#[cfg(not(target_os = "macos"))]
+fn is_mac() -> bool {
+    false
+}
--- a/launch/dynamo-run/src/main.rs
+++ b/launch/dynamo-run/src/main.rs
@@ -108,6 +108,25 @@ fn main() -> anyhow::Result<()> {
            }
        }
    }
+    #[cfg(any(feature = "mistralrs", feature = "llamacpp"))]
+    {
+        #[cfg(feature = "cuda")]
+        {
+            tracing::info!("CUDA on");
+        }
+        #[cfg(feature = "metal")]
+        {
+            tracing::info!("Metal on");
+        }
+        #[cfg(feature = "vulkan")]
+        {
+            tracing::info!("Vulkan on");
+        }
+        #[cfg(not(any(feature = "cuda", feature = "metal", feature = "vulkan")))]
+        tracing::info!(
+            "CPU mode. Rebuild with `--features cuda|metal|vulkan` for better performance"
+        );
+    }
    // max_worker_threads and max_blocking_threads from env vars or config file.
    let rt_config = dynamo_runtime::RuntimeConfig::from_settings()?;
@@ -165,7 +184,8 @@ async fn wrapper(runtime: dynamo_runtime::Runtime) -> anyhow::Result<()> {
        None => {
            let default_engine = Output::default(); // smart default based on feature flags
            tracing::info!(
-                "Using default engine: {default_engine}. Use out=<engine> to specify an engine."
+                "Using default engine: {default_engine}. Use out=<engine> to specify one of {}",
+                Output::available_engines().join(", ")
            );
            default_engine
        }

--- a/launch/dynamo-run/src/opt.rs
+++ b/launch/dynamo-run/src/opt.rs
@@ -204,20 +204,15 @@ impl fmt::Display for Output {
    }
 }
-/// Returns the engine to use if user did not say on cmd line
+/// Returns the engine to use if user did not say on cmd line.
-/// Uses whatever was compiled in, with a priority ordering.
+/// Nearly always defaults to mistralrs which has no dependencies and we include by default.
+/// If built with --no-default-features and a specific engine, default to that.
 #[allow(unused_assignments, unused_mut)]
 impl Default for Output {
    fn default() -> Self {
        // Default if no engines
        let mut out = Output::EchoFull;
-        // Runs everywhere but needs local CUDA to build
-        #[cfg(feature = "mistralrs")]
-        {
-            out = Output::MistralRs;
-        }
        #[cfg(feature = "llamacpp")]
        {
            out = Output::LlamaCpp;
@@ -233,6 +228,11 @@ impl Default for Output {
            out = Output::Vllm;
        }
+        #[cfg(feature = "mistralrs")]
+        {
+            out = Output::MistralRs;
+        }
        out
    }
 }

--- a/lib/llm/src/engines.rs
+++ b/lib/llm/src/engines.rs
@@ -51,11 +51,14 @@ impl Default for MultiNodeConfig {
    }
 }
-#[cfg(feature = "python")]
+#[cfg(any(feature = "sglang", feature = "vllm", feature = "python"))]
 use pyo3::prelude::*;
 /// On Mac embedded Python interpreters do not pick up the virtual env.
-#[cfg(all(target_os = "macos", feature = "python"))]
+#[cfg(all(
+    target_os = "macos",
+    any(feature = "sglang", feature = "vllm", feature = "python")
+))]
 fn fix_venv(venv: String, py: pyo3::Python<'_>) -> anyhow::Result<()> {
    let version_info = py.version_info();
    let sys: PyObject = py.import("sys")?.into();
@@ -69,5 +72,8 @@ fn fix_venv(venv: String, py: pyo3::Python<'_>) -> anyhow::Result<()> {
    Ok(())
 }
-#[cfg(all(target_os = "linux", feature = "python"))]
+#[cfg(all(
+    target_os = "linux",
+    any(feature = "sglang", feature = "vllm", feature = "python")
+))]
 fn fix_venv(_venv: String, _py: Python<'_>) {}