Unverified Commit 0e77d344 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

refactor: centralize environment variable constants (#4083)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent cf97c0dc
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
use std::env; use std::env;
use clap::{CommandFactory as _, Parser}; use clap::{CommandFactory as _, Parser};
use dynamo_runtime::config::environment_names::logging as env_logging;
use dynamo_llm::entrypoint::input::Input; use dynamo_llm::entrypoint::input::Input;
use dynamo_run::Output; use dynamo_run::Output;
...@@ -44,7 +45,7 @@ fn main() -> anyhow::Result<()> { ...@@ -44,7 +45,7 @@ fn main() -> anyhow::Result<()> {
}; };
if log_level != "info" { if log_level != "info" {
unsafe { std::env::set_var("DYN_LOG", log_level) }; unsafe { std::env::set_var(env_logging::DYN_LOG, log_level) };
} }
logging::init(); logging::init();
......
...@@ -10,6 +10,7 @@ use dynamo_llm::block_manager::kv_consolidator::KvEventConsolidatorConfig; ...@@ -10,6 +10,7 @@ use dynamo_llm::block_manager::kv_consolidator::KvEventConsolidatorConfig;
use dynamo_llm::block_manager::offload::filter::FrequencyFilter; use dynamo_llm::block_manager::offload::filter::FrequencyFilter;
use dynamo_llm::block_manager::{BasicMetadata, BlockParallelismStrategy}; use dynamo_llm::block_manager::{BasicMetadata, BlockParallelismStrategy};
use dynamo_runtime::DistributedRuntime; use dynamo_runtime::DistributedRuntime;
use dynamo_runtime::config::environment_names::kvbm as env_kvbm;
use pyo3::PyResult; use pyo3::PyResult;
use std::time::Duration; use std::time::Duration;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
...@@ -53,7 +54,7 @@ fn create_disk_offload_filter( ...@@ -53,7 +54,7 @@ fn create_disk_offload_filter(
runtime: &tokio::runtime::Handle, runtime: &tokio::runtime::Handle,
) -> Result<Option<Arc<FrequencyFilter>>> { ) -> Result<Option<Arc<FrequencyFilter>>> {
// Check if disk offload filter is disabled via environment variable // Check if disk offload filter is disabled via environment variable
let disable_filter = std::env::var("DYN_KVBM_DISABLE_DISK_OFFLOAD_FILTER") let disable_filter = std::env::var(env_kvbm::DYN_KVBM_DISABLE_DISK_OFFLOAD_FILTER)
.map(|v| v == "true" || v == "1") .map(|v| v == "true" || v == "1")
.unwrap_or(false); .unwrap_or(false);
......
...@@ -9,13 +9,10 @@ use llm_rs::block_manager::distributed::{ ...@@ -9,13 +9,10 @@ use llm_rs::block_manager::distributed::{
}; };
use utils::{get_leader_zmq_ack_url, get_leader_zmq_pub_url}; use utils::{get_leader_zmq_ack_url, get_leader_zmq_pub_url};
const CPU_CACHE: &str = "DYN_KVBM_CPU_CACHE_GB"; use dynamo_runtime::config::environment_names::kvbm::cpu_cache as env_cpu_cache;
const CPU_CACHE_OVERRIDE: &str = "DYN_KVBM_CPU_CACHE_OVERRIDE_NUM_BLOCKS"; use dynamo_runtime::config::environment_names::kvbm::disk_cache as env_disk_cache;
use dynamo_runtime::config::environment_names::kvbm::leader as env_kvbm_leader;
const DISK_CACHE: &str = "DYN_KVBM_DISK_CACHE_GB";
const DISK_CACHE_OVERRIDE: &str = "DYN_KVBM_DISK_CACHE_OVERRIDE_NUM_BLOCKS";
const LEADER_WORKER_INIT_TIMEOUT_SECS: &str = "DYN_KVBM_LEADER_WORKER_INIT_TIMEOUT_SECS";
const DEFAULT_INIT_TIMEOUT_SECS: u64 = 1800; const DEFAULT_INIT_TIMEOUT_SECS: u64 = 1800;
fn read_env_usize(key: &str) -> Option<usize> { fn read_env_usize(key: &str) -> Option<usize> {
...@@ -81,13 +78,19 @@ impl KvbmLeader { ...@@ -81,13 +78,19 @@ impl KvbmLeader {
})?; })?;
let leader_init_timeout_sec: u64 = let leader_init_timeout_sec: u64 =
get_leader_init_timeout_secs(LEADER_WORKER_INIT_TIMEOUT_SECS); get_leader_init_timeout_secs(env_kvbm_leader::DYN_KVBM_LEADER_WORKER_INIT_TIMEOUT_SECS);
let config = KvbmLeaderConfig::builder() let config = KvbmLeaderConfig::builder()
.world_size(world_size) .world_size(world_size)
.leader_init_timeout_secs(leader_init_timeout_sec) .leader_init_timeout_secs(leader_init_timeout_sec)
.host_blocks_config(get_blocks_config(CPU_CACHE, CPU_CACHE_OVERRIDE)) .host_blocks_config(get_blocks_config(
.disk_blocks_config(get_blocks_config(DISK_CACHE, DISK_CACHE_OVERRIDE)) env_cpu_cache::DYN_KVBM_CPU_CACHE_GB,
env_cpu_cache::DYN_KVBM_CPU_CACHE_OVERRIDE_NUM_BLOCKS,
))
.disk_blocks_config(get_blocks_config(
env_disk_cache::DYN_KVBM_DISK_CACHE_GB,
env_disk_cache::DYN_KVBM_DISK_CACHE_OVERRIDE_NUM_BLOCKS,
))
.leader_pub_url(get_leader_zmq_pub_url()) .leader_pub_url(get_leader_zmq_pub_url())
.leader_ack_url(get_leader_zmq_ack_url()) .leader_ack_url(get_leader_zmq_ack_url())
.build() .build()
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
use std::env; use std::env;
use dynamo_runtime::config::environment_names::kvbm::leader as env_kvbm_leader;
const DEFAULT_LEADER_ZMQ_HOST: &str = "127.0.0.1"; const DEFAULT_LEADER_ZMQ_HOST: &str = "127.0.0.1";
const DEFAULT_LEADER_ZMQ_PUB_PORT: u16 = 56001; const DEFAULT_LEADER_ZMQ_PUB_PORT: u16 = 56001;
const DEFAULT_LEADER_ZMQ_ACK_PORT: u16 = 56002; const DEFAULT_LEADER_ZMQ_ACK_PORT: u16 = 56002;
...@@ -35,16 +37,24 @@ fn validated_port_from_env(key: &str, default_port: u16) -> u16 { ...@@ -35,16 +37,24 @@ fn validated_port_from_env(key: &str, default_port: u16) -> u16 {
} }
fn get_leader_zmq_host() -> String { fn get_leader_zmq_host() -> String {
read_env_trimmed("DYN_KVBM_LEADER_ZMQ_HOST") read_env_trimmed(env_kvbm_leader::DYN_KVBM_LEADER_ZMQ_HOST)
.unwrap_or_else(|| DEFAULT_LEADER_ZMQ_HOST.to_string()) .unwrap_or_else(|| DEFAULT_LEADER_ZMQ_HOST.to_string())
} }
fn get_leader_zmq_pub_port() -> String { fn get_leader_zmq_pub_port() -> String {
validated_port_from_env("DYN_KVBM_LEADER_ZMQ_PUB_PORT", DEFAULT_LEADER_ZMQ_PUB_PORT).to_string() validated_port_from_env(
env_kvbm_leader::DYN_KVBM_LEADER_ZMQ_PUB_PORT,
DEFAULT_LEADER_ZMQ_PUB_PORT,
)
.to_string()
} }
fn get_leader_zmq_ack_port() -> String { fn get_leader_zmq_ack_port() -> String {
validated_port_from_env("DYN_KVBM_LEADER_ZMQ_ACK_PORT", DEFAULT_LEADER_ZMQ_ACK_PORT).to_string() validated_port_from_env(
env_kvbm_leader::DYN_KVBM_LEADER_ZMQ_ACK_PORT,
DEFAULT_LEADER_ZMQ_ACK_PORT,
)
.to_string()
} }
pub fn get_leader_zmq_pub_url() -> String { pub fn get_leader_zmq_pub_url() -> String {
......
...@@ -24,6 +24,7 @@ use dynamo_llm::block_manager::{ ...@@ -24,6 +24,7 @@ use dynamo_llm::block_manager::{
connector::*, connector::*,
}; };
use dynamo_llm::tokens::{SaltHash, TokenBlockSequence, Tokens}; use dynamo_llm::tokens::{SaltHash, TokenBlockSequence, Tokens};
use dynamo_runtime::config::environment_names::kvbm as env_kvbm;
use std::sync::{Arc, OnceLock}; use std::sync::{Arc, OnceLock};
use std::{collections::HashSet, sync::Mutex}; use std::{collections::HashSet, sync::Mutex};
use tokio; use tokio;
...@@ -576,7 +577,7 @@ impl PyKvConnectorLeader { ...@@ -576,7 +577,7 @@ impl PyKvConnectorLeader {
// Initialize logging for the vLLM connector // Initialize logging for the vLLM connector
dynamo_runtime::logging::init(); dynamo_runtime::logging::init();
let enable_kvbm_record = std::env::var("ENABLE_KVBM_RECORD") let enable_kvbm_record = std::env::var(env_kvbm::ENABLE_KVBM_RECORD)
.map(|v| v == "1" || v.eq_ignore_ascii_case("true")) .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
.unwrap_or(false); .unwrap_or(false);
...@@ -646,13 +647,13 @@ impl PyKvConnectorLeader { ...@@ -646,13 +647,13 @@ impl PyKvConnectorLeader {
} }
pub fn kvbm_metrics_endpoint_enabled() -> bool { pub fn kvbm_metrics_endpoint_enabled() -> bool {
std::env::var("DYN_KVBM_METRICS") std::env::var(env_kvbm::DYN_KVBM_METRICS)
.map(|v| v == "1" || v.eq_ignore_ascii_case("true")) .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
.unwrap_or(false) .unwrap_or(false)
} }
pub fn parse_kvbm_metrics_port() -> u16 { pub fn parse_kvbm_metrics_port() -> u16 {
match std::env::var("DYN_KVBM_METRICS_PORT") { match std::env::var(env_kvbm::DYN_KVBM_METRICS_PORT) {
Ok(val) => match val.trim().parse::<u16>() { Ok(val) => match val.trim().parse::<u16>() {
Ok(port) => port, Ok(port) => port,
Err(_) => { Err(_) => {
......
...@@ -22,6 +22,7 @@ use std::{ ...@@ -22,6 +22,7 @@ use std::{
use tokio::sync::Mutex; use tokio::sync::Mutex;
use tracing::Instrument; use tracing::Instrument;
use dynamo_runtime::config::environment_names::logging::otlp as env_otlp;
use dynamo_runtime::{ use dynamo_runtime::{
self as rs, logging, self as rs, logging,
pipeline::{ pipeline::{
...@@ -125,7 +126,10 @@ fn create_request_context( ...@@ -125,7 +126,10 @@ fn create_request_context(
#[pymodule] #[pymodule]
fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> { fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
// Initialize logging early unless OTEL export is enabled (which requires tokio runtime) // Initialize logging early unless OTEL export is enabled (which requires tokio runtime)
if rs::config::env_is_truthy("OTEL_EXPORT_ENABLED") { if std::env::var(env_otlp::OTEL_EXPORT_ENABLED)
.map(|v| v == "1")
.unwrap_or(false)
{
eprintln!( eprintln!(
"Warning: OTEL_EXPORT_ENABLED detected. Logging initialization deferred until runtime is available. Early logs may be dropped." "Warning: OTEL_EXPORT_ENABLED detected. Logging initialization deferred until runtime is available. Early logs may be dropped."
); );
...@@ -455,7 +459,10 @@ impl DistributedRuntime { ...@@ -455,7 +459,10 @@ impl DistributedRuntime {
// Initialize logging in context where tokio runtime is available // Initialize logging in context where tokio runtime is available
// otel exporter requires it // otel exporter requires it
if rs::config::env_is_truthy("OTEL_EXPORT_ENABLED") { if std::env::var(env_otlp::OTEL_EXPORT_ENABLED)
.map(|v| v == "1")
.unwrap_or(false)
{
runtime.secondary().block_on(async { runtime.secondary().block_on(async {
rs::logging::init(); rs::logging::init();
}); });
......
...@@ -4,6 +4,10 @@ ...@@ -4,6 +4,10 @@
use std::env; use std::env;
use std::path::PathBuf; use std::path::PathBuf;
// Environment variable names (build.rs can't import from runtime crate)
const DYNAMO_FATBIN_PATH: &str = "DYNAMO_FATBIN_PATH";
const OUT_DIR: &str = "OUT_DIR";
fn main() -> Result<(), Box<dyn std::error::Error>> { fn main() -> Result<(), Box<dyn std::error::Error>> {
// Declare our custom cfg flag to avoid unexpected_cfgs warnings // Declare our custom cfg flag to avoid unexpected_cfgs warnings
println!("cargo:rustc-check-cfg=cfg(have_vec_copy_fatbin)"); println!("cargo:rustc-check-cfg=cfg(have_vec_copy_fatbin)");
...@@ -14,7 +18,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> { ...@@ -14,7 +18,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// Get FATBIN path and copy it to OUT_DIR for embedding // Get FATBIN path and copy it to OUT_DIR for embedding
if let Some(fatbin_path) = find_fatbin_file() { if let Some(fatbin_path) = find_fatbin_file() {
// Copy FATBIN to OUT_DIR so we can include it with a predictable path // Copy FATBIN to OUT_DIR so we can include it with a predictable path
let out_dir = env::var("OUT_DIR").unwrap(); let out_dir = env::var(OUT_DIR).unwrap();
let dest_path = PathBuf::from(out_dir).join("vectorized_copy.fatbin"); let dest_path = PathBuf::from(out_dir).join("vectorized_copy.fatbin");
if let Err(e) = std::fs::copy(&fatbin_path, &dest_path) { if let Err(e) = std::fs::copy(&fatbin_path, &dest_path) {
...@@ -52,7 +56,7 @@ fn build_protos() -> Result<(), Box<dyn std::error::Error>> { ...@@ -52,7 +56,7 @@ fn build_protos() -> Result<(), Box<dyn std::error::Error>> {
fn find_fatbin_file() -> Option<PathBuf> { fn find_fatbin_file() -> Option<PathBuf> {
// 1. Check if user specified custom path via environment variable // 1. Check if user specified custom path via environment variable
if let Ok(custom_path) = env::var("DYNAMO_FATBIN_PATH") { if let Ok(custom_path) = env::var(DYNAMO_FATBIN_PATH) {
let fatbin_file = PathBuf::from(custom_path); let fatbin_file = PathBuf::from(custom_path);
if fatbin_file.exists() { if fatbin_file.exists() {
println!( println!(
......
...@@ -9,6 +9,7 @@ use crate::block_manager::block::{BlockDataProvider, BlockDataProviderMut}; ...@@ -9,6 +9,7 @@ use crate::block_manager::block::{BlockDataProvider, BlockDataProviderMut};
use anyhow::Result; use anyhow::Result;
use cudarc::driver::CudaStream; use cudarc::driver::CudaStream;
use cudarc::driver::result as cuda_result; use cudarc::driver::result as cuda_result;
use dynamo_runtime::config::environment_names::cuda as env_cuda;
use std::ops::Range; use std::ops::Range;
use std::sync::Mutex; use std::sync::Mutex;
use std::sync::OnceLock; use std::sync::OnceLock;
...@@ -561,7 +562,7 @@ fn load_embedded_fatbin() -> Result<cudarc::driver::sys::CUmodule, cudarc::drive ...@@ -561,7 +562,7 @@ fn load_embedded_fatbin() -> Result<cudarc::driver::sys::CUmodule, cudarc::drive
// Try to load FATBIN from filesystem (runtime) // Try to load FATBIN from filesystem (runtime)
fn load_runtime_fatbin() -> Result<cudarc::driver::sys::CUmodule, cudarc::driver::DriverError> { fn load_runtime_fatbin() -> Result<cudarc::driver::sys::CUmodule, cudarc::driver::DriverError> {
// 1. Check runtime environment variable first // 1. Check runtime environment variable first
if let Ok(runtime_path) = std::env::var("DYNAMO_FATBIN_PATH") if let Ok(runtime_path) = std::env::var(env_cuda::DYNAMO_FATBIN_PATH)
&& let Ok(fatbin_data) = std::fs::read(&runtime_path) && let Ok(fatbin_data) = std::fs::read(&runtime_path)
{ {
tracing::debug!("Loading FATBIN from runtime env var: {}", runtime_path); tracing::debug!("Loading FATBIN from runtime env var: {}", runtime_path);
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
use super::events::EventManager; use super::events::EventManager;
use super::*; use super::*;
use dynamo_runtime::config::environment_names::kvbm::cpu_cache as env_cpu_cache;
use dynamo_runtime::config::environment_names::kvbm::disk_cache as env_disk_cache;
use prometheus::Registry; use prometheus::Registry;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
...@@ -229,22 +231,24 @@ impl KvBlockManagerConfig { ...@@ -229,22 +231,24 @@ impl KvBlockManagerConfig {
/// - AND CPU cache env vars are NOT set (`DYN_KVBM_CPU_CACHE_GB` or `DYN_KVBM_CPU_CACHE_OVERRIDE_NUM_BLOCKS`) /// - AND CPU cache env vars are NOT set (`DYN_KVBM_CPU_CACHE_GB` or `DYN_KVBM_CPU_CACHE_OVERRIDE_NUM_BLOCKS`)
/// OR their values are zero (treated as not set) /// OR their values are zero (treated as not set)
pub fn should_bypass_cpu_cache() -> bool { pub fn should_bypass_cpu_cache() -> bool {
let cpu_cache_gb_set = std::env::var("DYN_KVBM_CPU_CACHE_GB") let cpu_cache_gb_set = std::env::var(env_cpu_cache::DYN_KVBM_CPU_CACHE_GB)
.ok() .ok()
.and_then(|v| v.parse::<u64>().ok()) .and_then(|v| v.parse::<u64>().ok())
.map(|v| v > 0) .map(|v| v > 0)
.unwrap_or(false); .unwrap_or(false);
let cpu_cache_override_set = std::env::var("DYN_KVBM_CPU_CACHE_OVERRIDE_NUM_BLOCKS") let cpu_cache_override_set =
std::env::var(env_cpu_cache::DYN_KVBM_CPU_CACHE_OVERRIDE_NUM_BLOCKS)
.ok() .ok()
.and_then(|v| v.parse::<usize>().ok()) .and_then(|v| v.parse::<usize>().ok())
.map(|v| v > 0) .map(|v| v > 0)
.unwrap_or(false); .unwrap_or(false);
let disk_cache_gb_set = std::env::var("DYN_KVBM_DISK_CACHE_GB") let disk_cache_gb_set = std::env::var(env_disk_cache::DYN_KVBM_DISK_CACHE_GB)
.ok() .ok()
.and_then(|v| v.parse::<u64>().ok()) .and_then(|v| v.parse::<u64>().ok())
.map(|v| v > 0) .map(|v| v > 0)
.unwrap_or(false); .unwrap_or(false);
let disk_cache_override_set = std::env::var("DYN_KVBM_DISK_CACHE_OVERRIDE_NUM_BLOCKS") let disk_cache_override_set =
std::env::var(env_disk_cache::DYN_KVBM_DISK_CACHE_OVERRIDE_NUM_BLOCKS)
.ok() .ok()
.and_then(|v| v.parse::<usize>().ok()) .and_then(|v| v.parse::<usize>().ok())
.map(|v| v > 0) .map(|v| v > 0)
......
...@@ -8,8 +8,11 @@ use axum::{ ...@@ -8,8 +8,11 @@ use axum::{
response::{IntoResponse, sse::Event}, response::{IntoResponse, sse::Event},
routing::get, routing::get,
}; };
use dynamo_runtime::metrics::prometheus_names::{ use dynamo_runtime::{
config::environment_names::llm::metrics as env_metrics,
metrics::prometheus_names::{
frontend_service, name_prefix, sanitize_frontend_prometheus_prefix, frontend_service, name_prefix, sanitize_frontend_prometheus_prefix,
},
}; };
use prometheus::{Encoder, HistogramOpts, HistogramVec, IntCounterVec, IntGaugeVec, Opts}; use prometheus::{Encoder, HistogramOpts, HistogramVec, IntCounterVec, IntGaugeVec, Opts};
use serde::Serialize; use serde::Serialize;
...@@ -120,6 +123,7 @@ fn parse_bucket_config( ...@@ -120,6 +123,7 @@ fn parse_bucket_config(
); );
return (1.0, 10.0, 10); return (1.0, 10.0, 10);
} }
let env_prefix = format!("{}{}", env_metrics::HISTOGRAM_PREFIX, env_prefix);
let mut min = std::env::var(format!("{env_prefix}_MIN")) let mut min = std::env::var(format!("{env_prefix}_MIN"))
.ok() .ok()
.and_then(|s| s.parse::<f64>().ok()) .and_then(|s| s.parse::<f64>().ok())
...@@ -302,7 +306,7 @@ impl Metrics { ...@@ -302,7 +306,7 @@ impl Metrics {
/// Metrics are never removed to preserve historical data. Runtime config and MDC /// Metrics are never removed to preserve historical data. Runtime config and MDC
/// metrics are updated when models are discovered and their configurations are available. /// metrics are updated when models are discovered and their configurations are available.
pub fn new() -> Self { pub fn new() -> Self {
let raw_prefix = std::env::var(frontend_service::METRICS_PREFIX_ENV) let raw_prefix = std::env::var(env_metrics::DYN_METRICS_PREFIX)
.unwrap_or_else(|_| name_prefix::FRONTEND.to_string()); .unwrap_or_else(|_| name_prefix::FRONTEND.to_string());
let prefix = sanitize_frontend_prometheus_prefix(&raw_prefix); let prefix = sanitize_frontend_prometheus_prefix(&raw_prefix);
if prefix != raw_prefix { if prefix != raw_prefix {
......
...@@ -20,6 +20,7 @@ use axum::{ ...@@ -20,6 +20,7 @@ use axum::{
}, },
routing::{get, post}, routing::{get, post},
}; };
use dynamo_runtime::config::environment_names::llm as env_llm;
use dynamo_runtime::{ use dynamo_runtime::{
pipeline::{AsyncEngineContextProvider, Context}, pipeline::{AsyncEngineContextProvider, Context},
protocols::annotated::AnnotationsProvider, protocols::annotated::AnnotationsProvider,
...@@ -59,7 +60,7 @@ pub const ANNOTATION_REQUEST_ID: &str = "request_id"; ...@@ -59,7 +60,7 @@ pub const ANNOTATION_REQUEST_ID: &str = "request_id";
/// Default body limit in bytes (45MB) to support 500k+ token payloads. /// Default body limit in bytes (45MB) to support 500k+ token payloads.
/// Can be configured at compile time using the DYN_FRONTEND_BODY_LIMIT_MB environment variable /// Can be configured at compile time using the DYN_FRONTEND_BODY_LIMIT_MB environment variable
fn get_body_limit() -> usize { fn get_body_limit() -> usize {
std::env::var("DYN_HTTP_BODY_LIMIT_MB") std::env::var(env_llm::DYN_HTTP_BODY_LIMIT_MB)
.ok() .ok()
.and_then(|s| s.parse::<usize>().ok()) .and_then(|s| s.parse::<usize>().ok())
.map(|mb| mb * 1024 * 1024) .map(|mb| mb * 1024 * 1024)
......
...@@ -9,8 +9,7 @@ use modelexpress_client::{ ...@@ -9,8 +9,7 @@ use modelexpress_client::{
}; };
use modelexpress_common::download as mx; use modelexpress_common::download as mx;
/// Example: export MODEL_EXPRESS_URL=http://localhost:8001 use dynamo_runtime::config::environment_names::model as env_model;
const MODEL_EXPRESS_ENDPOINT_ENV_VAR: &str = "MODEL_EXPRESS_URL";
/// Download a model using ModelExpress client. The client first requests for the model /// Download a model using ModelExpress client. The client first requests for the model
/// from the server and fallbacks to direct download in case of server failure. /// from the server and fallbacks to direct download in case of server failure.
...@@ -21,7 +20,7 @@ pub async fn from_hf(name: impl AsRef<Path>, ignore_weights: bool) -> anyhow::Re ...@@ -21,7 +20,7 @@ pub async fn from_hf(name: impl AsRef<Path>, ignore_weights: bool) -> anyhow::Re
let model_name = name.display().to_string(); let model_name = name.display().to_string();
let mut config: MxClientConfig = MxClientConfig::default(); let mut config: MxClientConfig = MxClientConfig::default();
if let Ok(endpoint) = env::var(MODEL_EXPRESS_ENDPOINT_ENV_VAR) { if let Ok(endpoint) = env::var(env_model::model_express::MODEL_EXPRESS_URL) {
config = config.with_endpoint(endpoint); config = config.with_endpoint(endpoint);
} }
...@@ -92,17 +91,17 @@ async fn mx_download_direct(model_name: &str, ignore_weights: bool) -> anyhow::R ...@@ -92,17 +91,17 @@ async fn mx_download_direct(model_name: &str, ignore_weights: bool) -> anyhow::R
fn get_model_express_cache_dir() -> PathBuf { fn get_model_express_cache_dir() -> PathBuf {
// Check HF_HUB_CACHE environment variable // Check HF_HUB_CACHE environment variable
// reference: https://huggingface.co/docs/huggingface_hub/en/package_reference/environment_variables#hfhubcache // reference: https://huggingface.co/docs/huggingface_hub/en/package_reference/environment_variables#hfhubcache
if let Ok(cache_path) = env::var("HF_HUB_CACHE") { if let Ok(cache_path) = env::var(env_model::huggingface::HF_HUB_CACHE) {
return PathBuf::from(cache_path); return PathBuf::from(cache_path);
} }
// Check HF_HOME environment variable (standard Hugging Face cache directory) // Check HF_HOME environment variable (standard Hugging Face cache directory)
// reference: https://huggingface.co/docs/huggingface_hub/en/package_reference/environment_variables#hfhome // reference: https://huggingface.co/docs/huggingface_hub/en/package_reference/environment_variables#hfhome
if let Ok(hf_home) = env::var("HF_HOME") { if let Ok(hf_home) = env::var(env_model::huggingface::HF_HOME) {
return PathBuf::from(hf_home).join("hub"); return PathBuf::from(hf_home).join("hub");
} }
if let Ok(cache_path) = env::var("MODEL_EXPRESS_CACHE_PATH") { if let Ok(cache_path) = env::var(env_model::model_express::MODEL_EXPRESS_CACHE_PATH) {
return PathBuf::from(cache_path); return PathBuf::from(cache_path);
} }
...@@ -136,14 +135,14 @@ mod tests { ...@@ -136,14 +135,14 @@ mod tests {
// Test that HF_HOME is respected when set // Test that HF_HOME is respected when set
unsafe { unsafe {
// Clear other cache env vars to ensure HF_HOME is tested // Clear other cache env vars to ensure HF_HOME is tested
env::remove_var("HF_HUB_CACHE"); env::remove_var(env_model::huggingface::HF_HUB_CACHE);
env::remove_var("MODEL_EXPRESS_CACHE_PATH"); env::remove_var(env_model::model_express::MODEL_EXPRESS_CACHE_PATH);
env::set_var("HF_HOME", "/custom/cache/path"); env::set_var(env_model::huggingface::HF_HOME, "/custom/cache/path");
let cache_dir = get_model_express_cache_dir(); let cache_dir = get_model_express_cache_dir();
assert_eq!(cache_dir, PathBuf::from("/custom/cache/path/hub")); assert_eq!(cache_dir, PathBuf::from("/custom/cache/path/hub"));
// Clean up // Clean up
env::remove_var("HF_HOME"); env::remove_var(env_model::huggingface::HF_HOME);
} }
} }
} }
...@@ -28,6 +28,7 @@ use crate::kv_router::{ ...@@ -28,6 +28,7 @@ use crate::kv_router::{
protocols::*, protocols::*,
scoring::LoadEvent, scoring::LoadEvent,
}; };
use dynamo_runtime::config::environment_names::nats as env_nats;
// ------------------------------------------------------------------------- // -------------------------------------------------------------------------
// KV Event Publishers ----------------------------------------------------- // KV Event Publishers -----------------------------------------------------
...@@ -124,8 +125,8 @@ impl KvEventPublisher { ...@@ -124,8 +125,8 @@ impl KvEventPublisher {
let stream_name = Slug::slugify(&format!("{}.{}", component.subject(), KV_EVENT_SUBJECT)) let stream_name = Slug::slugify(&format!("{}.{}", component.subject(), KV_EVENT_SUBJECT))
.to_string() .to_string()
.replace("_", "-"); .replace("_", "-");
let nats_server = let nats_server = std::env::var(env_nats::NATS_SERVER)
std::env::var("NATS_SERVER").unwrap_or_else(|_| "nats://localhost:4222".to_string()); .unwrap_or_else(|_| "nats://localhost:4222".to_string());
// Create NatsQueue without consumer since we're only publishing // Create NatsQueue without consumer since we're only publishing
let mut nats_queue = NatsQueue::new_without_consumer( let mut nats_queue = NatsQueue::new_without_consumer(
stream_name, stream_name,
......
...@@ -8,6 +8,7 @@ use std::{collections::HashSet, sync::Arc, time::Duration}; ...@@ -8,6 +8,7 @@ use std::{collections::HashSet, sync::Arc, time::Duration};
use anyhow::Result; use anyhow::Result;
use dynamo_runtime::{ use dynamo_runtime::{
component::Component, component::Component,
config::environment_names::nats as env_nats,
discovery::DiscoveryQuery, discovery::DiscoveryQuery,
prelude::*, prelude::*,
storage::key_value_store::WatchEvent, storage::key_value_store::WatchEvent,
...@@ -229,8 +230,8 @@ pub async fn start_kv_router_background( ...@@ -229,8 +230,8 @@ pub async fn start_kv_router_background(
let stream_name = Slug::slugify(&format!("{}.{}", component.subject(), KV_EVENT_SUBJECT)) let stream_name = Slug::slugify(&format!("{}.{}", component.subject(), KV_EVENT_SUBJECT))
.to_string() .to_string()
.replace("_", "-"); .replace("_", "-");
let nats_server = let nats_server = std::env::var(env_nats::NATS_SERVER)
std::env::var("NATS_SERVER").unwrap_or_else(|_| "nats://localhost:4222".to_string()); .unwrap_or_else(|_| "nats://localhost:4222".to_string());
// Create NatsQueue for event consumption // Create NatsQueue for event consumption
let mut nats_queue = NatsQueue::new_with_consumer( let mut nats_queue = NatsQueue::new_with_consumer(
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
use anyhow::{Ok, Result}; use anyhow::{Ok, Result};
use dynamo_runtime::config::environment_names::model::huggingface as env_hf;
use dynamo_llm::model_card::{ModelDeploymentCard, PromptContextMixin}; use dynamo_llm::model_card::{ModelDeploymentCard, PromptContextMixin};
use dynamo_llm::preprocessor::prompt::PromptFormatter; use dynamo_llm::preprocessor::prompt::PromptFormatter;
...@@ -39,7 +40,7 @@ use std::path::PathBuf; ...@@ -39,7 +40,7 @@ use std::path::PathBuf;
/// - Returns an error if `HF_TOKEN` environment variable is not set /// - Returns an error if `HF_TOKEN` environment variable is not set
/// - Returns an error if `HF_TOKEN` environment variable is empty or whitespace-only /// - Returns an error if `HF_TOKEN` environment variable is empty or whitespace-only
fn get_hf_token() -> Result<String> { fn get_hf_token() -> Result<String> {
let token = std::env::var("HF_TOKEN") let token = std::env::var(env_hf::HF_TOKEN)
.map_err(|_| anyhow::anyhow!("HF_TOKEN environment variable is not set"))?; .map_err(|_| anyhow::anyhow!("HF_TOKEN environment variable is not set"))?;
if token.trim().is_empty() { if token.trim().is_empty() {
......
...@@ -4,7 +4,8 @@ ...@@ -4,7 +4,8 @@
#![cfg(feature = "integration")] #![cfg(feature = "integration")]
use dynamo_runtime::{ use dynamo_runtime::{
DistributedRuntime, Result, Runtime, pipeline::PushRouter, protocols::annotated::Annotated, DistributedRuntime, Result, Runtime, config::environment_names::runtime::system as env_system,
pipeline::PushRouter, protocols::annotated::Annotated,
}; };
use futures::StreamExt; use futures::StreamExt;
use rand::Rng; use rand::Rng;
...@@ -16,7 +17,7 @@ use tokio::time::{Duration, sleep}; ...@@ -16,7 +17,7 @@ use tokio::time::{Duration, sleep};
#[tokio::test] #[tokio::test]
async fn test_backend_with_metrics() -> Result<()> { async fn test_backend_with_metrics() -> Result<()> {
// Set environment variable for dynamic port allocation (0 = auto-assign) // Set environment variable for dynamic port allocation (0 = auto-assign)
env::set_var("DYN_SYSTEM_PORT", "0"); env::set_var(env_system::DYN_SYSTEM_PORT, "0");
// Generate a random endpoint name to avoid collisions // Generate a random endpoint name to avoid collisions
let random_suffix = rand::rng().random_range(1000..9999); let random_suffix = rand::rng().random_range(1000..9999);
......
...@@ -12,6 +12,8 @@ use std::fmt; ...@@ -12,6 +12,8 @@ use std::fmt;
use std::sync::OnceLock; use std::sync::OnceLock;
use validator::Validate; use validator::Validate;
pub mod environment_names;
/// Default system host for health and metrics endpoints /// Default system host for health and metrics endpoints
const DEFAULT_SYSTEM_HOST: &str = "0.0.0.0"; const DEFAULT_SYSTEM_HOST: &str = "0.0.0.0";
...@@ -309,8 +311,9 @@ impl RuntimeConfig { ...@@ -309,8 +311,9 @@ impl RuntimeConfig {
/// ///
/// Environment variables are prefixed with `DYN_RUNTIME_` and `DYN_SYSTEM` /// Environment variables are prefixed with `DYN_RUNTIME_` and `DYN_SYSTEM`
pub fn from_settings() -> Result<RuntimeConfig> { pub fn from_settings() -> Result<RuntimeConfig> {
use environment_names::runtime::system as env_system;
// Check for deprecated environment variables // Check for deprecated environment variables
if std::env::var("DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS").is_ok() { if std::env::var(env_system::DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS).is_ok() {
tracing::warn!( tracing::warn!(
"DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS is deprecated and no longer used. \ "DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS is deprecated and no longer used. \
System health is now determined by endpoints that register with health check payloads. \ System health is now determined by endpoints that register with health check payloads. \
...@@ -318,7 +321,7 @@ impl RuntimeConfig { ...@@ -318,7 +321,7 @@ impl RuntimeConfig {
); );
} }
if std::env::var("DYN_SYSTEM_ENABLED").is_ok() { if std::env::var(env_system::DYN_SYSTEM_ENABLED).is_ok() {
tracing::warn!( tracing::warn!(
"DYN_SYSTEM_ENABLED is deprecated. \ "DYN_SYSTEM_ENABLED is deprecated. \
System metrics server is now controlled solely by DYN_SYSTEM_PORT. \ System metrics server is now controlled solely by DYN_SYSTEM_PORT. \
...@@ -454,19 +457,19 @@ pub fn env_is_falsey(env: &str) -> bool { ...@@ -454,19 +457,19 @@ pub fn env_is_falsey(env: &str) -> bool {
/// Check whether JSONL logging enabled /// Check whether JSONL logging enabled
/// Set the `DYN_LOGGING_JSONL` environment variable a [`is_truthy`] value /// Set the `DYN_LOGGING_JSONL` environment variable a [`is_truthy`] value
pub fn jsonl_logging_enabled() -> bool { pub fn jsonl_logging_enabled() -> bool {
env_is_truthy("DYN_LOGGING_JSONL") env_is_truthy(environment_names::logging::DYN_LOGGING_JSONL)
} }
/// Check whether logging with ANSI terminal escape codes and colors is disabled. /// Check whether logging with ANSI terminal escape codes and colors is disabled.
/// Set the `DYN_SDK_DISABLE_ANSI_LOGGING` environment variable a [`is_truthy`] value /// Set the `DYN_SDK_DISABLE_ANSI_LOGGING` environment variable a [`is_truthy`] value
pub fn disable_ansi_logging() -> bool { pub fn disable_ansi_logging() -> bool {
env_is_truthy("DYN_SDK_DISABLE_ANSI_LOGGING") env_is_truthy(environment_names::logging::DYN_SDK_DISABLE_ANSI_LOGGING)
} }
/// Check whether to use local timezone for logging timestamps (default is UTC) /// Check whether to use local timezone for logging timestamps (default is UTC)
/// Set the `DYN_LOG_USE_LOCAL_TZ` environment variable to a [`is_truthy`] value /// Set the `DYN_LOG_USE_LOCAL_TZ` environment variable to a [`is_truthy`] value
pub fn use_local_timezone() -> bool { pub fn use_local_timezone() -> bool {
env_is_truthy("DYN_LOG_USE_LOCAL_TZ") env_is_truthy(environment_names::logging::DYN_LOG_USE_LOCAL_TZ)
} }
#[cfg(test)] #[cfg(test)]
...@@ -475,10 +478,11 @@ mod tests { ...@@ -475,10 +478,11 @@ mod tests {
#[test] #[test]
fn test_runtime_config_with_env_vars() -> Result<()> { fn test_runtime_config_with_env_vars() -> Result<()> {
use environment_names::runtime;
temp_env::with_vars( temp_env::with_vars(
vec![ vec![
("DYN_RUNTIME_NUM_WORKER_THREADS", Some("24")), (runtime::DYN_RUNTIME_NUM_WORKER_THREADS, Some("24")),
("DYN_RUNTIME_MAX_BLOCKING_THREADS", Some("32")), (runtime::DYN_RUNTIME_MAX_BLOCKING_THREADS, Some("32")),
], ],
|| { || {
let config = RuntimeConfig::from_settings()?; let config = RuntimeConfig::from_settings()?;
...@@ -491,10 +495,11 @@ mod tests { ...@@ -491,10 +495,11 @@ mod tests {
#[test] #[test]
fn test_runtime_config_defaults() -> Result<()> { fn test_runtime_config_defaults() -> Result<()> {
use environment_names::runtime;
temp_env::with_vars( temp_env::with_vars(
vec![ vec![
("DYN_RUNTIME_NUM_WORKER_THREADS", None::<&str>), (runtime::DYN_RUNTIME_NUM_WORKER_THREADS, None::<&str>),
("DYN_RUNTIME_MAX_BLOCKING_THREADS", Some("")), (runtime::DYN_RUNTIME_MAX_BLOCKING_THREADS, Some("")),
], ],
|| { || {
let config = RuntimeConfig::from_settings()?; let config = RuntimeConfig::from_settings()?;
...@@ -512,10 +517,11 @@ mod tests { ...@@ -512,10 +517,11 @@ mod tests {
#[test] #[test]
fn test_runtime_config_rejects_invalid_thread_count() -> Result<()> { fn test_runtime_config_rejects_invalid_thread_count() -> Result<()> {
use environment_names::runtime;
temp_env::with_vars( temp_env::with_vars(
vec![ vec![
("DYN_RUNTIME_NUM_WORKER_THREADS", Some("0")), (runtime::DYN_RUNTIME_NUM_WORKER_THREADS, Some("0")),
("DYN_RUNTIME_MAX_BLOCKING_THREADS", Some("0")), (runtime::DYN_RUNTIME_MAX_BLOCKING_THREADS, Some("0")),
], ],
|| { || {
let result = RuntimeConfig::from_settings(); let result = RuntimeConfig::from_settings();
...@@ -537,10 +543,11 @@ mod tests { ...@@ -537,10 +543,11 @@ mod tests {
#[test] #[test]
fn test_runtime_config_system_server_env_vars() -> Result<()> { fn test_runtime_config_system_server_env_vars() -> Result<()> {
use environment_names::runtime::system;
temp_env::with_vars( temp_env::with_vars(
vec![ vec![
("DYN_SYSTEM_HOST", Some("127.0.0.1")), (system::DYN_SYSTEM_HOST, Some("127.0.0.1")),
("DYN_SYSTEM_PORT", Some("9090")), (system::DYN_SYSTEM_PORT, Some("9090")),
], ],
|| { || {
let config = RuntimeConfig::from_settings()?; let config = RuntimeConfig::from_settings()?;
...@@ -553,7 +560,8 @@ mod tests { ...@@ -553,7 +560,8 @@ mod tests {
#[test] #[test]
fn test_system_server_disabled_by_default() { fn test_system_server_disabled_by_default() {
temp_env::with_vars(vec![("DYN_SYSTEM_PORT", None::<&str>)], || { use environment_names::runtime::system;
temp_env::with_vars(vec![(system::DYN_SYSTEM_PORT, None::<&str>)], || {
let config = RuntimeConfig::from_settings().unwrap(); let config = RuntimeConfig::from_settings().unwrap();
assert!(!config.system_server_enabled()); assert!(!config.system_server_enabled());
assert_eq!(config.system_port, -1); assert_eq!(config.system_port, -1);
...@@ -562,7 +570,8 @@ mod tests { ...@@ -562,7 +570,8 @@ mod tests {
#[test] #[test]
fn test_system_server_disabled_with_negative_port() { fn test_system_server_disabled_with_negative_port() {
temp_env::with_vars(vec![("DYN_SYSTEM_PORT", Some("-1"))], || { use environment_names::runtime::system;
temp_env::with_vars(vec![(system::DYN_SYSTEM_PORT, Some("-1"))], || {
let config = RuntimeConfig::from_settings().unwrap(); let config = RuntimeConfig::from_settings().unwrap();
assert!(!config.system_server_enabled()); assert!(!config.system_server_enabled());
assert_eq!(config.system_port, -1); assert_eq!(config.system_port, -1);
...@@ -571,7 +580,8 @@ mod tests { ...@@ -571,7 +580,8 @@ mod tests {
#[test] #[test]
fn test_system_server_enabled_with_port() { fn test_system_server_enabled_with_port() {
temp_env::with_vars(vec![("DYN_SYSTEM_PORT", Some("9527"))], || { use environment_names::runtime::system;
temp_env::with_vars(vec![(system::DYN_SYSTEM_PORT, Some("9527"))], || {
let config = RuntimeConfig::from_settings().unwrap(); let config = RuntimeConfig::from_settings().unwrap();
assert!(config.system_server_enabled()); assert!(config.system_server_enabled());
assert_eq!(config.system_port, 9527); assert_eq!(config.system_port, 9527);
...@@ -580,8 +590,9 @@ mod tests { ...@@ -580,8 +590,9 @@ mod tests {
#[test] #[test]
fn test_system_server_starting_health_status_ready() { fn test_system_server_starting_health_status_ready() {
use environment_names::runtime::system;
temp_env::with_vars( temp_env::with_vars(
vec![("DYN_SYSTEM_STARTING_HEALTH_STATUS", Some("ready"))], vec![(system::DYN_SYSTEM_STARTING_HEALTH_STATUS, Some("ready"))],
|| { || {
let config = RuntimeConfig::from_settings().unwrap(); let config = RuntimeConfig::from_settings().unwrap();
assert!(config.starting_health_status == HealthStatus::Ready); assert!(config.starting_health_status == HealthStatus::Ready);
...@@ -591,8 +602,12 @@ mod tests { ...@@ -591,8 +602,12 @@ mod tests {
#[test] #[test]
fn test_system_use_endpoint_health_status() { fn test_system_use_endpoint_health_status() {
use environment_names::runtime::system;
temp_env::with_vars( temp_env::with_vars(
vec![("DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS", Some("[\"ready\"]"))], vec![(
system::DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS,
Some("[\"ready\"]"),
)],
|| { || {
let config = RuntimeConfig::from_settings().unwrap(); let config = RuntimeConfig::from_settings().unwrap();
assert!(config.use_endpoint_health_status == vec!["ready"]); assert!(config.use_endpoint_health_status == vec!["ready"]);
...@@ -602,7 +617,8 @@ mod tests { ...@@ -602,7 +617,8 @@ mod tests {
#[test] #[test]
fn test_system_health_endpoint_path_default() { fn test_system_health_endpoint_path_default() {
temp_env::with_vars(vec![("DYN_SYSTEM_HEALTH_PATH", None::<&str>)], || { use environment_names::runtime::system;
temp_env::with_vars(vec![(system::DYN_SYSTEM_HEALTH_PATH, None::<&str>)], || {
let config = RuntimeConfig::from_settings().unwrap(); let config = RuntimeConfig::from_settings().unwrap();
assert_eq!( assert_eq!(
config.system_health_path, config.system_health_path,
...@@ -610,7 +626,7 @@ mod tests { ...@@ -610,7 +626,7 @@ mod tests {
); );
}); });
temp_env::with_vars(vec![("DYN_SYSTEM_LIVE_PATH", None::<&str>)], || { temp_env::with_vars(vec![(system::DYN_SYSTEM_LIVE_PATH, None::<&str>)], || {
let config = RuntimeConfig::from_settings().unwrap(); let config = RuntimeConfig::from_settings().unwrap();
assert_eq!( assert_eq!(
config.system_live_path, config.system_live_path,
...@@ -621,18 +637,22 @@ mod tests { ...@@ -621,18 +637,22 @@ mod tests {
#[test] #[test]
fn test_system_health_endpoint_path_custom() { fn test_system_health_endpoint_path_custom() {
use environment_names::runtime::system;
temp_env::with_vars( temp_env::with_vars(
vec![("DYN_SYSTEM_HEALTH_PATH", Some("/custom/health"))], vec![(system::DYN_SYSTEM_HEALTH_PATH, Some("/custom/health"))],
|| { || {
let config = RuntimeConfig::from_settings().unwrap(); let config = RuntimeConfig::from_settings().unwrap();
assert_eq!(config.system_health_path, "/custom/health"); assert_eq!(config.system_health_path, "/custom/health");
}, },
); );
temp_env::with_vars(vec![("DYN_SYSTEM_LIVE_PATH", Some("/custom/live"))], || { temp_env::with_vars(
vec![(system::DYN_SYSTEM_LIVE_PATH, Some("/custom/live"))],
|| {
let config = RuntimeConfig::from_settings().unwrap(); let config = RuntimeConfig::from_settings().unwrap();
assert_eq!(config.system_live_path, "/custom/live"); assert_eq!(config.system_live_path, "/custom/live");
}); },
);
} }
#[test] #[test]
......
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Environment variable name constants for centralized management across the codebase
//!
//! This module provides centralized environment variable name constants to ensure
//! consistency and avoid duplication across the codebase, similar to how
//! `prometheus_names.rs` manages metric names.
//!
//! ## Organization
//!
//! Environment variables are organized by functional area:
//! - **Logging**: Log level, configuration, and OTLP tracing
//! - **Runtime**: Tokio runtime configuration and system server settings
//! - **NATS**: NATS client connection and authentication
//! - **ETCD**: ETCD client connection and authentication
//! - **KVBM**: Key-Value Block Manager configuration
//! - **LLM**: Language model inference configuration
//! - **Model**: Model loading and caching
//! - **Worker**: Worker lifecycle and shutdown
//! - **Testing**: Test-specific configuration
/// Logging and tracing environment variables
pub mod logging {
/// Log level (e.g., "debug", "info", "warn", "error")
pub const DYN_LOG: &str = "DYN_LOG";
/// Path to logging configuration file
pub const DYN_LOGGING_CONFIG_PATH: &str = "DYN_LOGGING_CONFIG_PATH";
/// Enable JSONL logging format
pub const DYN_LOGGING_JSONL: &str = "DYN_LOGGING_JSONL";
/// Disable ANSI terminal colors in logs
pub const DYN_SDK_DISABLE_ANSI_LOGGING: &str = "DYN_SDK_DISABLE_ANSI_LOGGING";
/// Use local timezone for logging timestamps (default is UTC)
pub const DYN_LOG_USE_LOCAL_TZ: &str = "DYN_LOG_USE_LOCAL_TZ";
/// OTLP (OpenTelemetry Protocol) tracing configuration
pub mod otlp {
/// Enable OTLP trace exporting (set to "1" to enable)
pub const OTEL_EXPORT_ENABLED: &str = "OTEL_EXPORT_ENABLED";
/// OTLP exporter endpoint URL
/// Spec: https://opentelemetry.io/docs/specs/otel/protocol/exporter/
pub const OTEL_EXPORTER_OTLP_TRACES_ENDPOINT: &str = "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT";
/// Service name for OTLP traces
pub const OTEL_SERVICE_NAME: &str = "OTEL_SERVICE_NAME";
}
}
/// Runtime configuration environment variables
///
/// These control the Tokio runtime, system health/metrics server, and worker behavior
pub mod runtime {
/// Number of async worker threads for Tokio runtime
pub const DYN_RUNTIME_NUM_WORKER_THREADS: &str = "DYN_RUNTIME_NUM_WORKER_THREADS";
/// Maximum number of blocking threads for Tokio runtime
pub const DYN_RUNTIME_MAX_BLOCKING_THREADS: &str = "DYN_RUNTIME_MAX_BLOCKING_THREADS";
/// System status server configuration
pub mod system {
/// Enable system status server for health and metrics endpoints
/// ⚠️ DEPRECATED: will be removed soon
pub const DYN_SYSTEM_ENABLED: &str = "DYN_SYSTEM_ENABLED";
/// System status server host
pub const DYN_SYSTEM_HOST: &str = "DYN_SYSTEM_HOST";
/// System status server port
pub const DYN_SYSTEM_PORT: &str = "DYN_SYSTEM_PORT";
/// Use endpoint health status for system health
/// ⚠️ DEPRECATED: No longer used
pub const DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS: &str =
"DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS";
/// Starting health status for the system
pub const DYN_SYSTEM_STARTING_HEALTH_STATUS: &str = "DYN_SYSTEM_STARTING_HEALTH_STATUS";
/// Health check endpoint path
pub const DYN_SYSTEM_HEALTH_PATH: &str = "DYN_SYSTEM_HEALTH_PATH";
/// Liveness check endpoint path
pub const DYN_SYSTEM_LIVE_PATH: &str = "DYN_SYSTEM_LIVE_PATH";
}
/// Compute configuration
pub mod compute {
/// Prefix for compute-related environment variables
pub const PREFIX: &str = "DYN_COMPUTE_";
}
/// Canary deployment configuration
pub mod canary {
/// Wait time in seconds for canary deployments
pub const DYN_CANARY_WAIT_TIME: &str = "DYN_CANARY_WAIT_TIME";
}
}
/// Worker lifecycle environment variables
pub mod worker {
/// Graceful shutdown timeout in seconds
pub const DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT: &str = "DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT";
}
/// NATS transport environment variables
pub mod nats {
/// NATS server address (e.g., "nats://localhost:4222")
pub const NATS_SERVER: &str = "NATS_SERVER";
/// NATS authentication environment variables (checked in priority order)
pub mod auth {
/// Username for NATS authentication (use with NATS_AUTH_PASSWORD)
pub const NATS_AUTH_USERNAME: &str = "NATS_AUTH_USERNAME";
/// Password for NATS authentication (use with NATS_AUTH_USERNAME)
pub const NATS_AUTH_PASSWORD: &str = "NATS_AUTH_PASSWORD";
/// Token for NATS authentication
pub const NATS_AUTH_TOKEN: &str = "NATS_AUTH_TOKEN";
/// NKey for NATS authentication
pub const NATS_AUTH_NKEY: &str = "NATS_AUTH_NKEY";
/// Path to NATS credentials file
pub const NATS_AUTH_CREDENTIALS_FILE: &str = "NATS_AUTH_CREDENTIALS_FILE";
}
/// NATS stream configuration
pub mod stream {
/// Maximum age for messages in NATS stream (in seconds)
pub const DYN_NATS_STREAM_MAX_AGE: &str = "DYN_NATS_STREAM_MAX_AGE";
}
}
/// ETCD transport environment variables
pub mod etcd {
/// ETCD endpoints (comma-separated list of URLs)
pub const ETCD_ENDPOINTS: &str = "ETCD_ENDPOINTS";
/// ETCD authentication environment variables
pub mod auth {
/// Username for ETCD authentication
pub const ETCD_AUTH_USERNAME: &str = "ETCD_AUTH_USERNAME";
/// Password for ETCD authentication
pub const ETCD_AUTH_PASSWORD: &str = "ETCD_AUTH_PASSWORD";
/// Path to CA certificate for ETCD TLS
pub const ETCD_AUTH_CA: &str = "ETCD_AUTH_CA";
/// Path to client certificate for ETCD TLS
pub const ETCD_AUTH_CLIENT_CERT: &str = "ETCD_AUTH_CLIENT_CERT";
/// Path to client key for ETCD TLS
pub const ETCD_AUTH_CLIENT_KEY: &str = "ETCD_AUTH_CLIENT_KEY";
}
}
/// Key-Value Block Manager (KVBM) environment variables
pub mod kvbm {
/// Enable KVBM metrics endpoint
pub const DYN_KVBM_METRICS: &str = "DYN_KVBM_METRICS";
/// KVBM metrics endpoint port
pub const DYN_KVBM_METRICS_PORT: &str = "DYN_KVBM_METRICS_PORT";
/// Enable KVBM recording for debugging
pub const ENABLE_KVBM_RECORD: &str = "ENABLE_KVBM_RECORD";
/// Disable disk offload filter
pub const DYN_KVBM_DISABLE_DISK_OFFLOAD_FILTER: &str = "DYN_KVBM_DISABLE_DISK_OFFLOAD_FILTER";
/// CPU cache configuration
pub mod cpu_cache {
/// CPU cache size in GB
pub const DYN_KVBM_CPU_CACHE_GB: &str = "DYN_KVBM_CPU_CACHE_GB";
/// CPU cache size in number of blocks (override)
pub const DYN_KVBM_CPU_CACHE_OVERRIDE_NUM_BLOCKS: &str =
"DYN_KVBM_CPU_CACHE_OVERRIDE_NUM_BLOCKS";
}
/// Disk cache configuration
pub mod disk_cache {
/// Disk cache size in GB
pub const DYN_KVBM_DISK_CACHE_GB: &str = "DYN_KVBM_DISK_CACHE_GB";
/// Disk cache size in number of blocks (override)
pub const DYN_KVBM_DISK_CACHE_OVERRIDE_NUM_BLOCKS: &str =
"DYN_KVBM_DISK_CACHE_OVERRIDE_NUM_BLOCKS";
}
/// KVBM leader (distributed mode) configuration
pub mod leader {
/// Timeout in seconds for KVBM leader and worker initialization
pub const DYN_KVBM_LEADER_WORKER_INIT_TIMEOUT_SECS: &str =
"DYN_KVBM_LEADER_WORKER_INIT_TIMEOUT_SECS";
/// ZMQ host for KVBM leader
pub const DYN_KVBM_LEADER_ZMQ_HOST: &str = "DYN_KVBM_LEADER_ZMQ_HOST";
/// ZMQ publish port for KVBM leader
pub const DYN_KVBM_LEADER_ZMQ_PUB_PORT: &str = "DYN_KVBM_LEADER_ZMQ_PUB_PORT";
/// ZMQ acknowledgment port for KVBM leader
pub const DYN_KVBM_LEADER_ZMQ_ACK_PORT: &str = "DYN_KVBM_LEADER_ZMQ_ACK_PORT";
}
/// NIXL backend configuration
pub mod nixl {
/// Prefix for NIXL backend environment variables
/// Pattern: DYN_KVBM_NIXL_BACKEND_<backend>=true/false
/// Example: DYN_KVBM_NIXL_BACKEND_UCX=true
pub const PREFIX: &str = "DYN_KVBM_NIXL_BACKEND_";
}
}
/// LLM (Language Model) inference environment variables
pub mod llm {
/// HTTP body size limit in MB
pub const DYN_HTTP_BODY_LIMIT_MB: &str = "DYN_HTTP_BODY_LIMIT_MB";
/// Metrics configuration
pub mod metrics {
/// Custom metrics prefix (overrides default "dynamo_frontend")
pub const DYN_METRICS_PREFIX: &str = "DYN_METRICS_PREFIX";
/// Histogram bucket configuration (pattern: <PREFIX>_MIN, <PREFIX>_MAX, <PREFIX>_COUNT)
/// Example: DYN_HISTOGRAM_TTFT_MIN, DYN_HISTOGRAM_TTFT_MAX, DYN_HISTOGRAM_TTFT_COUNT
pub const HISTOGRAM_PREFIX: &str = "DYN_HISTOGRAM_";
}
}
/// Model loading and caching environment variables
pub mod model {
/// Model Express configuration
pub mod model_express {
/// Model Express server endpoint URL
pub const MODEL_EXPRESS_URL: &str = "MODEL_EXPRESS_URL";
/// Model Express cache path
pub const MODEL_EXPRESS_CACHE_PATH: &str = "MODEL_EXPRESS_CACHE_PATH";
}
/// Hugging Face configuration
pub mod huggingface {
/// Hugging Face authentication token
pub const HF_TOKEN: &str = "HF_TOKEN";
/// Hugging Face Hub cache directory
pub const HF_HUB_CACHE: &str = "HF_HUB_CACHE";
/// Hugging Face home directory
pub const HF_HOME: &str = "HF_HOME";
}
}
/// CUDA and GPU environment variables
pub mod cuda {
/// Path to custom CUDA fatbin file
///
/// Note: build.rs files cannot import this constant at build time,
/// so they must define local constants with the same value.
pub const DYNAMO_FATBIN_PATH: &str = "DYNAMO_FATBIN_PATH";
}
/// Build-time environment variables
pub mod build {
/// Cargo output directory for build artifacts
///
/// Note: This constant cannot be used with the `env!()` macro,
/// which requires a string literal at compile time.
/// Build scripts (build.rs) also cannot import this constant.
pub const OUT_DIR: &str = "OUT_DIR";
}
/// Testing environment variables
pub mod testing {
/// Enable queued-up request processing in tests
pub const DYN_QUEUED_UP_PROCESSING: &str = "DYN_QUEUED_UP_PROCESSING";
/// Soak test run duration (e.g., "3s", "5m")
pub const DYN_SOAK_RUN_DURATION: &str = "DYN_SOAK_RUN_DURATION";
/// Soak test batch load size
pub const DYN_SOAK_BATCH_LOAD: &str = "DYN_SOAK_BATCH_LOAD";
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_no_duplicate_env_var_names() {
use std::collections::HashSet;
let mut seen = HashSet::new();
let vars = [
// Logging
logging::DYN_LOG,
logging::DYN_LOGGING_CONFIG_PATH,
logging::DYN_LOGGING_JSONL,
logging::DYN_SDK_DISABLE_ANSI_LOGGING,
logging::DYN_LOG_USE_LOCAL_TZ,
logging::otlp::OTEL_EXPORT_ENABLED,
logging::otlp::OTEL_EXPORTER_OTLP_TRACES_ENDPOINT,
logging::otlp::OTEL_SERVICE_NAME,
// Runtime
runtime::DYN_RUNTIME_NUM_WORKER_THREADS,
runtime::DYN_RUNTIME_MAX_BLOCKING_THREADS,
runtime::system::DYN_SYSTEM_ENABLED,
runtime::system::DYN_SYSTEM_HOST,
runtime::system::DYN_SYSTEM_PORT,
runtime::system::DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS,
runtime::system::DYN_SYSTEM_STARTING_HEALTH_STATUS,
runtime::system::DYN_SYSTEM_HEALTH_PATH,
runtime::system::DYN_SYSTEM_LIVE_PATH,
runtime::canary::DYN_CANARY_WAIT_TIME,
// Worker
worker::DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT,
// NATS
nats::NATS_SERVER,
nats::auth::NATS_AUTH_USERNAME,
nats::auth::NATS_AUTH_PASSWORD,
nats::auth::NATS_AUTH_TOKEN,
nats::auth::NATS_AUTH_NKEY,
nats::auth::NATS_AUTH_CREDENTIALS_FILE,
nats::stream::DYN_NATS_STREAM_MAX_AGE,
// ETCD
etcd::ETCD_ENDPOINTS,
etcd::auth::ETCD_AUTH_USERNAME,
etcd::auth::ETCD_AUTH_PASSWORD,
etcd::auth::ETCD_AUTH_CA,
etcd::auth::ETCD_AUTH_CLIENT_CERT,
etcd::auth::ETCD_AUTH_CLIENT_KEY,
// KVBM
kvbm::DYN_KVBM_METRICS,
kvbm::DYN_KVBM_METRICS_PORT,
kvbm::ENABLE_KVBM_RECORD,
kvbm::DYN_KVBM_DISABLE_DISK_OFFLOAD_FILTER,
kvbm::cpu_cache::DYN_KVBM_CPU_CACHE_GB,
kvbm::cpu_cache::DYN_KVBM_CPU_CACHE_OVERRIDE_NUM_BLOCKS,
kvbm::disk_cache::DYN_KVBM_DISK_CACHE_GB,
kvbm::disk_cache::DYN_KVBM_DISK_CACHE_OVERRIDE_NUM_BLOCKS,
kvbm::leader::DYN_KVBM_LEADER_WORKER_INIT_TIMEOUT_SECS,
kvbm::leader::DYN_KVBM_LEADER_ZMQ_HOST,
kvbm::leader::DYN_KVBM_LEADER_ZMQ_PUB_PORT,
kvbm::leader::DYN_KVBM_LEADER_ZMQ_ACK_PORT,
// LLM
llm::DYN_HTTP_BODY_LIMIT_MB,
llm::metrics::DYN_METRICS_PREFIX,
// Model
model::model_express::MODEL_EXPRESS_URL,
model::model_express::MODEL_EXPRESS_CACHE_PATH,
model::huggingface::HF_TOKEN,
model::huggingface::HF_HUB_CACHE,
model::huggingface::HF_HOME,
// CUDA
cuda::DYNAMO_FATBIN_PATH,
// Build
build::OUT_DIR,
// Testing
testing::DYN_QUEUED_UP_PROCESSING,
testing::DYN_SOAK_RUN_DURATION,
testing::DYN_SOAK_BATCH_LOAD,
];
for var in &vars {
if !seen.insert(var) {
panic!("Duplicate environment variable name: {}", var);
}
}
}
#[test]
fn test_naming_conventions() {
// Dynamo-specific vars should start with DYN_
assert!(runtime::DYN_RUNTIME_NUM_WORKER_THREADS.starts_with("DYN_"));
assert!(runtime::system::DYN_SYSTEM_ENABLED.starts_with("DYN_"));
assert!(kvbm::DYN_KVBM_METRICS.starts_with("DYN_"));
assert!(worker::DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT.starts_with("DYN_"));
// NATS vars should start with NATS_
assert!(nats::NATS_SERVER.starts_with("NATS_"));
assert!(nats::auth::NATS_AUTH_USERNAME.starts_with("NATS_AUTH_"));
// ETCD vars should start with ETCD_
assert!(etcd::ETCD_ENDPOINTS.starts_with("ETCD_"));
assert!(etcd::auth::ETCD_AUTH_USERNAME.starts_with("ETCD_AUTH_"));
// OpenTelemetry vars should start with OTEL_
assert!(logging::otlp::OTEL_EXPORT_ENABLED.starts_with("OTEL_"));
assert!(logging::otlp::OTEL_SERVICE_NAME.starts_with("OTEL_"));
}
}
...@@ -567,8 +567,9 @@ mod tests { ...@@ -567,8 +567,9 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_drt_uptime_after_delay_system_disabled() { async fn test_drt_uptime_after_delay_system_disabled() {
use crate::config::environment_names::runtime::system as env_system;
// Test uptime with system status server disabled // Test uptime with system status server disabled
temp_env::async_with_vars([("DYN_SYSTEM_PORT", None::<&str>)], async { temp_env::async_with_vars([(env_system::DYN_SYSTEM_PORT, None::<&str>)], async {
// Start a DRT // Start a DRT
let drt = create_test_drt_async().await; let drt = create_test_drt_async().await;
...@@ -593,8 +594,9 @@ mod tests { ...@@ -593,8 +594,9 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_drt_uptime_after_delay_system_enabled() { async fn test_drt_uptime_after_delay_system_enabled() {
use crate::config::environment_names::runtime::system as env_system;
// Test uptime with system status server enabled // Test uptime with system status server enabled
temp_env::async_with_vars([("DYN_SYSTEM_PORT", Some("8081"))], async { temp_env::async_with_vars([(env_system::DYN_SYSTEM_PORT, Some("8081"))], async {
// Start a DRT // Start a DRT
let drt = create_test_drt_async().await; let drt = create_test_drt_async().await;
......
...@@ -87,28 +87,14 @@ use tracing::{info, instrument}; ...@@ -87,28 +87,14 @@ use tracing::{info, instrument};
use tracing_opentelemetry::OpenTelemetrySpanExt; use tracing_opentelemetry::OpenTelemetrySpanExt;
use tracing_subscriber::util::SubscriberInitExt; use tracing_subscriber::util::SubscriberInitExt;
/// ENV used to set the log level use crate::config::environment_names::logging as env_logging;
const FILTER_ENV: &str = "DYN_LOG";
/// Default log level /// Default log level
const DEFAULT_FILTER_LEVEL: &str = "info"; const DEFAULT_FILTER_LEVEL: &str = "info";
/// ENV used to set the path to the logging configuration file
const CONFIG_PATH_ENV: &str = "DYN_LOGGING_CONFIG_PATH";
/// Enable OTLP trace exporting
const OTEL_EXPORT_ENABLED_ENV: &str = "OTEL_EXPORT_ENABLED";
/// (OLTP exporter env var spec defined here - https://opentelemetry.io/docs/specs/otel/protocol/exporter/)
/// OTEL exporter endpoint
const OTEL_EXPORT_ENDPOINT_ENV: &str = "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT";
/// Default OTLP endpoint /// Default OTLP endpoint
const DEFAULT_OTLP_ENDPOINT: &str = "http://localhost:4317"; const DEFAULT_OTLP_ENDPOINT: &str = "http://localhost:4317";
/// Service name environment variable
const OTEL_SERVICE_NAME_ENV: &str = "OTEL_SERVICE_NAME";
/// Default service name /// Default service name
const DEFAULT_OTEL_SERVICE_NAME: &str = "dynamo"; const DEFAULT_OTEL_SERVICE_NAME: &str = "dynamo";
...@@ -144,14 +130,17 @@ impl Default for LoggingConfig { ...@@ -144,14 +130,17 @@ impl Default for LoggingConfig {
} }
} }
/// Check if OTLP trace exporting is enabled (set OTEL_EXPORT_ENABLED to a truthy value: 1, true, on, yes) /// Check if OTLP trace exporting is enabled (set OTEL_EXPORT_ENABLED to "1" to enable)
fn otlp_exporter_enabled() -> bool { fn otlp_exporter_enabled() -> bool {
crate::config::env_is_truthy(OTEL_EXPORT_ENABLED_ENV) std::env::var(env_logging::otlp::OTEL_EXPORT_ENABLED)
.map(|v| v == "1")
.unwrap_or(false)
} }
/// Get the service name from environment or use default /// Get the service name from environment or use default
fn get_service_name() -> String { fn get_service_name() -> String {
std::env::var(OTEL_SERVICE_NAME_ENV).unwrap_or_else(|_| DEFAULT_OTEL_SERVICE_NAME.to_string()) std::env::var(env_logging::otlp::OTEL_SERVICE_NAME)
.unwrap_or_else(|_| DEFAULT_OTEL_SERVICE_NAME.to_string())
} }
/// Validate a given trace ID according to W3C Trace Context specifications. /// Validate a given trace ID according to W3C Trace Context specifications.
...@@ -777,7 +766,7 @@ fn setup_logging() -> Result<(), Box<dyn std::error::Error>> { ...@@ -777,7 +766,7 @@ fn setup_logging() -> Result<(), Box<dyn std::error::Error>> {
// Build tracer provider - with or without OTLP export // Build tracer provider - with or without OTLP export
let (tracer_provider, endpoint_opt) = if otlp_exporter_enabled() { let (tracer_provider, endpoint_opt) = if otlp_exporter_enabled() {
// Export enabled: create OTLP exporter with batch processor // Export enabled: create OTLP exporter with batch processor
let endpoint = std::env::var(OTEL_EXPORT_ENDPOINT_ENV) let endpoint = std::env::var(env_logging::otlp::OTEL_EXPORTER_OTLP_TRACES_ENDPOINT)
.unwrap_or_else(|_| DEFAULT_OTLP_ENDPOINT.to_string()); .unwrap_or_else(|_| DEFAULT_OTLP_ENDPOINT.to_string());
// Initialize OTLP exporter using gRPC (Tonic) // Initialize OTLP exporter using gRPC (Tonic)
...@@ -852,7 +841,7 @@ fn setup_logging() -> Result<(), Box<dyn std::error::Error>> { ...@@ -852,7 +841,7 @@ fn setup_logging() -> Result<(), Box<dyn std::error::Error>> {
fn filters(config: LoggingConfig) -> EnvFilter { fn filters(config: LoggingConfig) -> EnvFilter {
let mut filter_layer = EnvFilter::builder() let mut filter_layer = EnvFilter::builder()
.with_default_directive(config.log_level.parse().unwrap()) .with_default_directive(config.log_level.parse().unwrap())
.with_env_var(FILTER_ENV) .with_env_var(env_logging::DYN_LOG)
.from_env_lossy(); .from_env_lossy();
for (module, level) in config.log_filters { for (module, level) in config.log_filters {
...@@ -891,7 +880,8 @@ pub fn log_message(level: &str, message: &str, module: &str, file: &str, line: u ...@@ -891,7 +880,8 @@ pub fn log_message(level: &str, message: &str, module: &str, file: &str, line: u
} }
fn load_config() -> LoggingConfig { fn load_config() -> LoggingConfig {
let config_path = std::env::var(CONFIG_PATH_ENV).unwrap_or_else(|_| "".to_string()); let config_path =
std::env::var(env_logging::DYN_LOGGING_CONFIG_PATH).unwrap_or_else(|_| "".to_string());
let figment = Figment::new() let figment = Figment::new()
.merge(Serialized::defaults(LoggingConfig::default())) .merge(Serialized::defaults(LoggingConfig::default()))
.merge(Toml::file("/opt/dynamo/etc/logging.toml")) .merge(Toml::file("/opt/dynamo/etc/logging.toml"))
...@@ -1288,7 +1278,7 @@ pub mod tests { ...@@ -1288,7 +1278,7 @@ pub mod tests {
async fn test_json_log_capture() -> Result<()> { async fn test_json_log_capture() -> Result<()> {
#[allow(clippy::redundant_closure_call)] #[allow(clippy::redundant_closure_call)]
let _ = temp_env::async_with_vars( let _ = temp_env::async_with_vars(
[("DYN_LOGGING_JSONL", Some("1"))], [(env_logging::DYN_LOGGING_JSONL, Some("1"))],
(async || { (async || {
let tmp_file = NamedTempFile::new().unwrap(); let tmp_file = NamedTempFile::new().unwrap();
let file_name = tmp_file.path().to_str().unwrap(); let file_name = tmp_file.path().to_str().unwrap();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment