Unverified Commit cb7ebdd7 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

chore: remove unused NIM specific code (part 2) (#5893)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent 8b651fe9
......@@ -43,10 +43,6 @@ from dynamo.runtime.logging import configure_dynamo_logging
from . import __version__
DYN_NAMESPACE_ENV_VAR = "DYN_NAMESPACE"
CUSTOM_BACKEND_METRICS_POLLING_INTERVAL_ENV_VAR = (
"CUSTOM_BACKEND_METRICS_POLLING_INTERVAL"
)
CUSTOM_BACKEND_ENDPOINT_ENV_VAR = "CUSTOM_BACKEND_ENDPOINT"
configure_dynamo_logging()
logger = logging.getLogger(__name__)
......@@ -276,22 +272,6 @@ def parse_args():
help="HTTP metrics port for gRPC service (u16). Only used with --kserve-grpc-server. Defaults to 8788.",
)
add_config_dump_args(parser)
parser.add_argument(
"--custom-backend-metrics-endpoint",
type=str,
default=os.environ.get(
CUSTOM_BACKEND_ENDPOINT_ENV_VAR, "nim.backend.runtime_stats"
),
help=f"Custom backend endpoint to poll for metrics in format 'namespace.component.endpoint' (default: 'nim.backend.runtime_stats'). Required if --custom-backend-metrics-polling-interval is specified. All metrics will be prefixed with 'dynamo_component_' in Prometheus. Can be set via {CUSTOM_BACKEND_ENDPOINT_ENV_VAR} env var.",
)
parser.add_argument(
"--custom-backend-metrics-polling-interval",
type=float,
default=float(
os.environ.get(CUSTOM_BACKEND_METRICS_POLLING_INTERVAL_ENV_VAR, "0")
),
help=f"Interval in seconds for polling custom backend metrics. Set to > 0 to enable polling (default: 0=disabled, suggested: 9.2s which is less than typical Prometheus scrape interval). Can be set via {CUSTOM_BACKEND_METRICS_POLLING_INTERVAL_ENV_VAR} env var.",
)
parser.add_argument(
"--store-kv",
type=str,
......@@ -324,10 +304,6 @@ def parse_args():
if bool(flags.tls_cert_path) ^ bool(flags.tls_key_path): # ^ is XOR
parser.error("--tls-cert-path and --tls-key-path must be provided together")
if flags.custom_backend_metrics_polling_interval < 0:
parser.error(
"--custom-backend-metrics-polling-interval must be >= 0 (0=disabled)"
)
return flags
......@@ -431,14 +407,6 @@ async def async_main():
kwargs["namespace"] = flags.namespace
if flags.kserve_grpc_server and flags.grpc_metrics_port:
kwargs["http_metrics_port"] = flags.grpc_metrics_port
if flags.custom_backend_metrics_endpoint:
kwargs[
"custom_backend_metrics_endpoint"
] = flags.custom_backend_metrics_endpoint
if flags.custom_backend_metrics_polling_interval:
kwargs[
"custom_backend_metrics_polling_interval"
] = flags.custom_backend_metrics_polling_interval
if flags.exp_python_factory:
kwargs["engine_factory"] = engine_factory
......
......@@ -806,7 +806,6 @@ class BaseWorkerHandler(ABC):
Decode base64-encoded prompt embeddings in PyTorch format.
Format: PyTorch tensor serialized with torch.save() and base64-encoded.
This matches NIM-LLM's implementation for compatibility.
Args:
prompt_embeds_base64: Base64-encoded PyTorch tensor
......
......@@ -171,8 +171,6 @@ pub(crate) struct EntrypointArgs {
tls_key_path: Option<PathBuf>,
extra_engine_args: Option<PathBuf>,
namespace: Option<String>,
custom_backend_metrics_endpoint: Option<String>,
custom_backend_metrics_polling_interval: Option<f64>,
is_prefill: bool,
engine_factory: Option<PyEngineFactory>,
}
......@@ -181,7 +179,7 @@ pub(crate) struct EntrypointArgs {
impl EntrypointArgs {
#[allow(clippy::too_many_arguments)]
#[new]
#[pyo3(signature = (engine_type, model_path=None, model_name=None, endpoint_id=None, context_length=None, template_file=None, router_config=None, kv_cache_block_size=None, http_host=None, http_port=None, http_metrics_port=None, tls_cert_path=None, tls_key_path=None, extra_engine_args=None, namespace=None, custom_backend_metrics_endpoint=None, custom_backend_metrics_polling_interval=None, is_prefill=false, engine_factory=None))]
#[pyo3(signature = (engine_type, model_path=None, model_name=None, endpoint_id=None, context_length=None, template_file=None, router_config=None, kv_cache_block_size=None, http_host=None, http_port=None, http_metrics_port=None, tls_cert_path=None, tls_key_path=None, extra_engine_args=None, namespace=None, is_prefill=false, engine_factory=None))]
pub fn new(
py: Python<'_>,
engine_type: EngineType,
......@@ -199,8 +197,6 @@ impl EntrypointArgs {
tls_key_path: Option<PathBuf>,
extra_engine_args: Option<PathBuf>,
namespace: Option<String>,
custom_backend_metrics_endpoint: Option<String>,
custom_backend_metrics_polling_interval: Option<f64>,
is_prefill: bool,
engine_factory: Option<PyObject>,
) -> PyResult<Self> {
......@@ -245,8 +241,6 @@ impl EntrypointArgs {
tls_key_path,
extra_engine_args,
namespace,
custom_backend_metrics_endpoint,
custom_backend_metrics_polling_interval,
is_prefill,
engine_factory,
})
......@@ -287,9 +281,7 @@ pub fn make_engine<'p>(
.tls_key_path(args.tls_key_path.clone())
.is_mocker(matches!(args.engine_type, EngineType::Mocker))
.extra_engine_args(args.extra_engine_args.clone())
.namespace(args.namespace.clone())
.custom_backend_metrics_endpoint(args.custom_backend_metrics_endpoint.clone())
.custom_backend_metrics_polling_interval(args.custom_backend_metrics_polling_interval);
.namespace(args.namespace.clone());
pyo3_async_runtimes::tokio::future_into_py(py, async move {
if let Some(model_path) = args.model_path.clone() {
let local_path = if model_path.exists() {
......
......@@ -3,7 +3,7 @@
//! Helper binary to generate the Dynamo HTTP frontend OpenAPI specification.
//!
//! This allows CI, documentation tooling, and NIM to obtain the exact same
//! This allows CI and documentation tooling to obtain the exact same
//! OpenAPI document that is served at `/openapi.json` by the frontend
//! without having to start the HTTP service and scrape the endpoint.
//!
......
......@@ -51,15 +51,6 @@ pub async fn run(
http_service_builder =
http_service_builder.with_request_template(engine_config.local_model().request_template());
// DEPRECATED: To be removed after custom backends migrate to Dynamo backend.
// Pass the custom backend metrics endpoint as-is (already in namespace.component.endpoint format)
http_service_builder = http_service_builder.with_custom_backend_config(
local_model
.custom_backend_metrics_endpoint()
.map(|s| s.to_string()),
local_model.custom_backend_metrics_polling_interval(),
);
let http_service = match engine_config {
EngineConfig::Dynamic {
ref model,
......@@ -145,46 +136,10 @@ pub async fn run(
.collect::<Vec<String>>()
);
// DEPRECATED: To be removed after custom backends migrate to Dynamo backend.
// Start custom backend metrics polling if configured
let polling_task =
if let (Some(namespace_component_endpoint), Some(polling_interval), Some(registry)) = (
http_service
.custom_backend_namespace_component_endpoint
.as_ref(),
http_service.custom_backend_metrics_polling_interval,
http_service.custom_backend_registry.as_ref(),
) {
tracing::info!(
namespace_component_endpoint=%namespace_component_endpoint,
polling_interval_secs=polling_interval,
"Starting custom backend metrics polling task"
);
// Spawn the polling task and keep the JoinHandle alive so it can be aborted during
// shutdown. While graceful shutdown is not strictly necessary for this non-critical
// metrics polling, explicitly aborting it prevents the task from running during the
// shutdown phase.
Some(
crate::http::service::custom_backend_metrics::spawn_custom_backend_polling_task(
distributed_runtime.clone(),
namespace_component_endpoint.clone(),
polling_interval,
registry.clone(),
),
)
} else {
None
};
http_service
.run(distributed_runtime.primary_token())
.await?;
// Abort the polling task if it was started
if let Some(task) = polling_task {
task.abort();
}
distributed_runtime.shutdown(); // Cancel primary token
Ok(())
}
......
......@@ -21,7 +21,6 @@
mod openai;
pub mod busy_threshold;
pub mod custom_backend_metrics;
pub mod disconnect;
pub mod error;
pub mod health;
......
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
// DEPRECATED: To be removed after custom backends migrate to Dynamo backend.
//
// Custom backend metrics polling and collection.
//
// This module provides a bridge to poll metrics from custom backends (like NIM) that expose
// their own metrics endpoints, and makes them available through Prometheus.
use std::{
collections::HashMap,
sync::{Arc, Mutex},
time::Duration,
};
use serde::Deserialize;
/// Maximum number of custom backend gauges that can be registered to prevent unbounded growth.
pub const MAX_CUSTOM_BACKEND_GAUGES: usize = 100;
/// Registry for custom backend metrics discovered at runtime.
///
/// Metrics from custom backends are exposed as Prometheus gauges since we're setting
/// absolute values received from polling, not incrementing them locally.
///
/// All metrics are automatically prefixed when registered. For example, if the prefix is
/// `dynamo_component` and a backend reports a gauge named `kv_cache_usage_perc`, it will
/// be exposed as `dynamo_component_kv_cache_usage_perc` in Prometheus metrics.
pub struct CustomBackendMetricsRegistry {
gauges: Mutex<HashMap<String, prometheus::Gauge>>,
prefix: String,
prometheus_registry: prometheus::Registry,
}
impl CustomBackendMetricsRegistry {
pub fn new(prefix: String, prometheus_registry: prometheus::Registry) -> Self {
Self {
gauges: Mutex::new(HashMap::new()),
prefix,
prometheus_registry,
}
}
/// Get or create a gauge for the given metric name, registering it with Prometheus if new.
/// Returns None if the maximum number of gauges has been reached.
fn get_or_create_gauge(&self, name: &str) -> Option<prometheus::Gauge> {
let mut gauges = self.gauges.lock().unwrap();
if let Some(gauge) = gauges.get(name) {
return Some(gauge.clone());
}
// Cap the number of gauges to prevent unbounded growth
if gauges.len() >= MAX_CUSTOM_BACKEND_GAUGES {
tracing::warn!(
"Maximum number of custom backend gauges ({}) reached, dropping metric: {}",
MAX_CUSTOM_BACKEND_GAUGES,
name
);
return None;
}
let full_name = format!("{}_{}", self.prefix, name);
let gauge = prometheus::Gauge::new(full_name.as_str(), name)
.unwrap_or_else(|e| panic!("Failed to create gauge {}: {}", full_name, e));
if let Err(e) = self.prometheus_registry.register(Box::new(gauge.clone())) {
tracing::warn!(
"Failed to register custom backend gauge {}: {}",
full_name,
e
);
}
gauges.insert(name.to_string(), gauge.clone());
Some(gauge)
}
/// Update a gauge metric with a new value.
pub fn set_gauge(&self, name: &str, value: f64) {
if let Some(gauge) = self.get_or_create_gauge(name) {
gauge.set(value);
}
}
}
/// Response format from custom backend runtime_stats endpoint
#[derive(Debug, Deserialize)]
struct CustomBackendStatsResponse {
metrics: CustomBackendMetrics,
}
#[derive(Debug, Deserialize)]
struct CustomBackendMetrics {
gauges: HashMap<String, f64>,
}
/// Spawn a background task that polls custom backend metrics periodically.
///
/// All metrics collected from the backend will be prefixed according to the registry's prefix
/// (typically `dynamo_component_`). For example, a backend gauge `kv_cache_usage_perc` will
/// appear as `dynamo_component_kv_cache_usage_perc` in Prometheus.
///
/// This task does not use a CancellationToken for graceful shutdown. When the executable exits,
/// the task is abruptly terminated by the tokio runtime shutdown. This is acceptable because
/// metrics polling is non-critical with no risk of data corruption or resource leaks, typical
/// polling intervals are short, and the Worker already has a graceful shutdown timeout mechanism.
pub fn spawn_custom_backend_polling_task(
drt: dynamo_runtime::DistributedRuntime,
namespace_component_endpoint: String,
polling_interval_secs: f64,
registry: Arc<CustomBackendMetricsRegistry>,
) -> tokio::task::JoinHandle<()> {
tokio::spawn(async move {
tracing::info!(
namespace_component_endpoint=%namespace_component_endpoint,
interval_secs=polling_interval_secs,
"Starting custom backend metrics polling"
);
// Parse namespace.component.endpoint format
let parts: Vec<&str> = namespace_component_endpoint.split('.').collect();
if parts.len() != 3 {
tracing::error!(
namespace_component_endpoint=%namespace_component_endpoint,
"Invalid endpoint format, expected 'namespace.component.endpoint'"
);
return;
}
let (namespace, component_name, endpoint_name) = (parts[0], parts[1], parts[2]);
// Get namespace, component, and endpoint from DRT
let Ok(ns) = drt.namespace(namespace.to_string()) else {
tracing::error!("Namespace not available: {}", namespace);
return;
};
let Ok(component) = ns.component(component_name) else {
tracing::error!("Component not available: {}", component_name);
return;
};
let endpoint = component.endpoint(endpoint_name);
// Wait for client to be ready (backend might not be available yet)
let client = loop {
match endpoint.client().await {
Ok(client) => break client,
Err(e) => {
tracing::warn!(
error=%e,
namespace=%namespace,
component=%component_name,
endpoint=%endpoint_name,
"Failed to create client for custom backend endpoint, retrying in 5s"
);
tokio::time::sleep(Duration::from_secs(5)).await;
}
}
};
// Create router for sending requests to the backend
use dynamo_runtime::pipeline::{PushRouter, RouterMode};
use dynamo_runtime::protocols::annotated::Annotated;
let Ok(router) =
PushRouter::<String, Annotated<String>>::from_client(client, RouterMode::Random).await
else {
tracing::error!(
namespace=%namespace,
component=%component_name,
endpoint=%endpoint_name,
"Failed to create router for custom backend endpoint"
);
return;
};
tracing::info!(
namespace=%namespace,
component=%component_name,
endpoint=%endpoint_name,
"Custom backend metrics polling started"
);
// Poll backend at regular intervals
let interval = Duration::from_secs_f64(polling_interval_secs);
loop {
tokio::time::sleep(interval).await;
match poll_backend_once(&router, &registry).await {
Ok(num_metrics) => {
tracing::debug!(
num_metrics=%num_metrics,
"Successfully polled custom backend metrics"
);
}
Err(e) => {
tracing::warn!(
error=%e,
"Failed to poll custom backend metrics"
);
}
}
}
})
}
/// Poll the backend once and update the registry.
async fn poll_backend_once(
router: &dynamo_runtime::pipeline::PushRouter<
String,
dynamo_runtime::protocols::annotated::Annotated<String>,
>,
registry: &Arc<CustomBackendMetricsRegistry>,
) -> anyhow::Result<usize> {
use dynamo_runtime::pipeline::Context;
let response_stream = router.random(Context::new("".to_string())).await?;
// Collect responses from the stream
let mut responses = Vec::new();
{
use futures::StreamExt;
let mut stream = response_stream;
while let Some(response) = stream.next().await {
if let Some(data) = response.data {
responses.push(data);
}
}
}
if responses.is_empty() {
anyhow::bail!("No responses received from custom backend");
}
// Parse the first response as JSON
// Expected format from backend (as JSON string):
// {
// "schema_version": 1,
// "worker_id": "mock-worker-1",
// "backend": "vllm",
// "ts": 1759967807,
// "metrics": {
// "gauges": {
// "kv_cache_usage_perc": 0.3,
// "gpu_utilization_perc": 75.5,
// "active_requests": 5
// }
// }
// }
let stats: CustomBackendStatsResponse = serde_json::from_str(&responses[0])
.map_err(|e| anyhow::anyhow!("Failed to parse backend stats JSON: {}", e))?;
// Update gauges in the registry
for (name, value) in &stats.metrics.gauges {
registry.set_gauge(name, *value);
}
Ok(stats.metrics.gauges.len())
}
......@@ -1270,9 +1270,6 @@ pub fn router(registry: Registry, path: Option<String>) -> (Vec<RouteDoc>, Route
/// Unified metrics handler
async fn handler_metrics(State(state): State<Arc<MetricsHandlerState>>) -> impl IntoResponse {
// Gather and encode metrics
// Note: If nim_on_demand is enabled, the NimMetricsCollector registered with the registry
// will automatically call poll_nim_backend_stats when gather() is invoked
let encoder = prometheus::TextEncoder::new();
let metric_families = state.registry.gather();
let mut buffer = vec![];
......
......@@ -69,7 +69,7 @@ struct ApiDoc;
/// Generate OpenAPI specification from route documentation
///
/// This is the core helper used both by the embedded Swagger UI and by
/// external tools (for example CI or NIM) which need to materialize the
/// external tools (for example CI) which need to materialize the
/// same frontend OpenAPI specification without running the HTTP service.
pub fn generate_openapi_spec(route_docs: &[RouteDoc]) -> utoipa::openapi::OpenApi {
let mut openapi = ApiDoc::openapi();
......
......@@ -25,7 +25,6 @@ use derive_builder::Builder;
use dynamo_runtime::config::environment_names::llm as env_llm;
use dynamo_runtime::discovery::{Discovery, KVStoreDiscovery};
use dynamo_runtime::logging::make_request_span;
use dynamo_runtime::metrics::prometheus_names::name_prefix;
use dynamo_runtime::storage::kv;
use std::net::SocketAddr;
use tokio::task::JoinHandle;
......@@ -162,12 +161,6 @@ pub struct HttpService {
tls_cert_path: Option<PathBuf>,
tls_key_path: Option<PathBuf>,
route_docs: Vec<RouteDoc>,
// DEPRECATED: To be removed after custom backends migrate to Dynamo backend.
pub(crate) custom_backend_namespace_component_endpoint: Option<String>,
pub(crate) custom_backend_metrics_polling_interval: Option<f64>,
pub(crate) custom_backend_registry:
Option<Arc<super::custom_backend_metrics::CustomBackendMetricsRegistry>>,
}
#[derive(Clone, Builder)]
......@@ -207,13 +200,6 @@ pub struct HttpServiceConfig {
#[builder(default)]
store: kv::Manager,
// DEPRECATED: To be removed after custom backends migrate to Dynamo backend.
#[builder(default = "None")]
custom_backend_namespace_component_endpoint: Option<String>,
#[builder(default = "None")]
custom_backend_metrics_polling_interval: Option<f64>,
}
impl HttpService {
......@@ -405,22 +391,6 @@ impl HttpServiceConfigBuilder {
tracing::warn!("Failed to register worker timing metrics: {}", e);
}
// DEPRECATED: To be removed after custom backends migrate to Dynamo backend.
// Setup custom backend metrics if configured
let custom_backend_registry =
if config.custom_backend_namespace_component_endpoint.is_some()
&& config.custom_backend_metrics_polling_interval.is_some()
{
Some(Arc::new(
super::custom_backend_metrics::CustomBackendMetricsRegistry::new(
name_prefix::COMPONENT.to_string(),
registry.clone(),
),
))
} else {
None
};
let mut router = axum::Router::new();
let mut all_docs = Vec::new();
......@@ -490,10 +460,6 @@ impl HttpServiceConfigBuilder {
tls_cert_path: config.tls_cert_path,
tls_key_path: config.tls_key_path,
route_docs: all_docs,
custom_backend_namespace_component_endpoint: config
.custom_backend_namespace_component_endpoint,
custom_backend_metrics_polling_interval: config.custom_backend_metrics_polling_interval,
custom_backend_registry,
})
}
......@@ -502,17 +468,6 @@ impl HttpServiceConfigBuilder {
self
}
// DEPRECATED: To be removed after custom backends migrate to Dynamo backend.
pub fn with_custom_backend_config(
mut self,
namespace_component_endpoint: Option<String>,
polling_interval: Option<f64>,
) -> Self {
self.custom_backend_namespace_component_endpoint = Some(namespace_component_endpoint);
self.custom_backend_metrics_polling_interval = Some(polling_interval);
self
}
fn get_endpoints_router(
state: Arc<State>,
request_template: &Option<RequestTemplate>,
......
......@@ -56,8 +56,6 @@ pub struct LocalModelBuilder {
user_data: Option<serde_json::Value>,
custom_template_path: Option<PathBuf>,
namespace: Option<String>,
custom_backend_metrics_endpoint: Option<String>,
custom_backend_metrics_polling_interval: Option<f64>,
media_decoder: Option<MediaDecoder>,
media_fetcher: Option<MediaFetcher>,
}
......@@ -85,8 +83,6 @@ impl Default for LocalModelBuilder {
user_data: Default::default(),
custom_template_path: Default::default(),
namespace: Default::default(),
custom_backend_metrics_endpoint: Default::default(),
custom_backend_metrics_polling_interval: Default::default(),
media_decoder: Default::default(),
media_fetcher: Default::default(),
}
......@@ -199,16 +195,6 @@ impl LocalModelBuilder {
self
}
pub fn custom_backend_metrics_endpoint(&mut self, endpoint: Option<String>) -> &mut Self {
self.custom_backend_metrics_endpoint = endpoint;
self
}
pub fn custom_backend_metrics_polling_interval(&mut self, interval: Option<f64>) -> &mut Self {
self.custom_backend_metrics_polling_interval = interval;
self
}
pub fn media_decoder(&mut self, media_decoder: Option<MediaDecoder>) -> &mut Self {
self.media_decoder = media_decoder;
self
......@@ -304,9 +290,6 @@ impl LocalModelBuilder {
router_config: self.router_config.take().unwrap_or_default(),
runtime_config: self.runtime_config.clone(),
namespace: self.namespace.clone(),
custom_backend_metrics_endpoint: self.custom_backend_metrics_endpoint.clone(),
custom_backend_metrics_polling_interval: self
.custom_backend_metrics_polling_interval,
});
}
......@@ -358,8 +341,6 @@ impl LocalModelBuilder {
router_config: self.router_config.take().unwrap_or_default(),
runtime_config: self.runtime_config.clone(),
namespace: self.namespace.clone(),
custom_backend_metrics_endpoint: self.custom_backend_metrics_endpoint.clone(),
custom_backend_metrics_polling_interval: self.custom_backend_metrics_polling_interval,
})
}
}
......@@ -378,8 +359,6 @@ pub struct LocalModel {
router_config: RouterConfig,
runtime_config: ModelRuntimeConfig,
namespace: Option<String>,
custom_backend_metrics_endpoint: Option<String>,
custom_backend_metrics_polling_interval: Option<f64>,
}
impl LocalModel {
......@@ -447,14 +426,6 @@ impl LocalModel {
self.namespace.as_deref()
}
pub fn custom_backend_metrics_endpoint(&self) -> Option<&str> {
self.custom_backend_metrics_endpoint.as_deref()
}
pub fn custom_backend_metrics_polling_interval(&self) -> Option<f64> {
self.custom_backend_metrics_polling_interval
}
/// An endpoint to identify this model by.
pub fn endpoint_id(&self) -> &EndpointId {
&self.endpoint_id
......
......@@ -71,8 +71,6 @@ impl DeltaAggregator {
};
if aggregator.error.is_none() && delta.data.is_some() {
// note: we could extract annotations here and add them to the aggregator
// to be return as part of the NIM Response Extension
// TODO(#14) - Aggregate Annotation
// these are cheap to move so we do it every time since we are consuming the delta
......
......@@ -15,15 +15,12 @@ use rstest::rstest;
use std::path::PathBuf;
/// ----------------- NOTE ---------------
/// Currently ModelDeploymentCard does support downloading models using nim-hub.
/// As a temporary workaround, we will download the models from Hugging Face to a local cache
/// directory in `tests/data/sample-models`. These tests require a Hugging Face token to be
/// set in the environment variable `HF_TOKEN`.
/// The model is downloaded and cached in `tests/data/sample-models` directory.
/// make sure the token has access to `meta-llama/Llama-3.1-70B-Instruct` model
/// Gets the HF_TOKEN environment variable if it exists and is not empty.
///
/// These tests require a Hugging Face token to be set in the environment variable `HF_TOKEN`.
/// The model is downloaded and cached in `tests/data/sample-models` directory.
/// Make sure the token has access to `meta-llama/Llama-3.1-70B-Instruct` model.
///
/// This function checks for the presence of the `HF_TOKEN` environment variable
/// and validates that it's not empty or whitespace-only. The token is used for
/// downloading models from Hugging Face to a local cache directory in
......@@ -57,7 +54,6 @@ async fn make_mdc_from_repo(
hf_revision: &str,
mixins: Option<Vec<PromptContextMixin>>,
) -> ModelDeploymentCard {
//TODO: remove this once we have nim-hub support. See the NOTE above.
let downloaded_path = maybe_download_model(local_path, hf_repo, hf_revision).await;
let display_name = format!("{}--{}", hf_repo, hf_revision);
let mut mdc = ModelDeploymentCard::load_from_disk(downloaded_path, None).unwrap();
......@@ -110,30 +106,6 @@ async fn make_mdcs() -> Vec<ModelDeploymentCard> {
]
}
// fn load_nim_mdcs() -> Vec<ModelDeploymentCard> {
// // get all .json files from test/data/model_deployment_cards/nim
// std::fs::read_dir("tests/data/model_deployment_cards/nim")
// .unwrap()
// .map(|res| res.map(|e| e.path()).unwrap().clone())
// .filter(|path| path.extension().unwrap() == "json")
// .map(|path| ModelDeploymentCard::load_from_json_file(path).unwrap())
// .collect::<Vec<_>>()
// }
// #[ignore]
// #[tokio::test]
// async fn create_mdc_from_repo() {
// for repo in NGC_MODEL_REPOS.iter() {
// println!("Creating MDC for {}", repo);
// let mdc = make_mdc_from_repo(repo).await;
// mdc.save_to_json_file(&format!(
// "tests/data/model_deployment_cards/nim/{}.json",
// Slug::slugify(repo)
// ))
// .unwrap();
// }
// }
const SINGLE_CHAT_MESSAGE: &str = r#"
[
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment