// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 use crate::model_card::model::ModelDeploymentCard; use anyhow::{Context, Result}; use std::path::{Path, PathBuf}; use crate::model_card::model::{ModelInfoType, PromptFormatterArtifact, TokenizerKind}; use super::model::GenerationConfig; impl ModelDeploymentCard { /// Allow user to override the name we register this model under. /// Corresponds to vllm's `--served-model-name`. pub fn set_name(&mut self, name: &str) { self.display_name = name.to_string(); self.service_name = name.to_string(); } /// Build an in-memory ModelDeploymentCard from either: /// - a folder containing config.json, tokenizer.json and token_config.json /// - a GGUF file pub async fn load(config_path: impl AsRef) -> anyhow::Result { let config_path = config_path.as_ref(); if config_path.is_dir() { Self::from_local_path(config_path).await } else { Self::from_gguf(config_path).await } } /// Creates a ModelDeploymentCard from a local directory path. /// /// Currently HuggingFace format is supported and following files are expected: /// - config.json: Model configuration in HuggingFace format /// - tokenizer.json: Tokenizer configuration in HuggingFace format /// - tokenizer_config.json: Optional prompt formatter configuration /// /// # Arguments /// * `local_root_dir` - Path to the local model directory /// /// # Errors /// Returns an error if: /// - The path doesn't exist or isn't a directory /// - The path contains invalid Unicode characters /// - Required model files are missing or invalid async fn from_local_path(local_root_dir: impl AsRef) -> anyhow::Result { let local_root_dir = local_root_dir.as_ref(); check_valid_local_repo_path(local_root_dir)?; let repo_id = local_root_dir .canonicalize()? .to_str() .ok_or_else(|| anyhow::anyhow!("Path contains invalid Unicode"))? .to_string(); let model_name = local_root_dir .file_name() .and_then(|n| n.to_str()) .ok_or_else(|| anyhow::anyhow!("Invalid model directory name"))?; Self::from_repo(&repo_id, model_name).await } async fn from_gguf(gguf_file: &Path) -> anyhow::Result { let model_name = gguf_file .iter() .next_back() .map(|n| n.to_string_lossy().to_string()); let Some(model_name) = model_name else { // I think this would only happy on an empty path anyhow::bail!( "Could not extract model name from path '{}'", gguf_file.display() ); }; // TODO: we do this in HFConfig also, unify let content = super::model::load_gguf(gguf_file)?; let context_length = content.get_metadata()[&format!("{}.context_length", content.arch())] .to_u32() .unwrap_or(0); tracing::debug!(context_length, "Loaded context length from GGUF"); Ok(Self { display_name: model_name.to_string(), service_name: model_name.to_string(), model_info: Some(ModelInfoType::GGUF(gguf_file.to_path_buf())), tokenizer: Some(TokenizerKind::from_gguf(gguf_file)?), gen_config: None, // AFAICT there is no equivalent in a GGUF prompt_formatter: Some(PromptFormatterArtifact::GGUF(gguf_file.to_path_buf())), chat_template_file: None, prompt_context: None, // TODO - auto-detect prompt context revision: 0, last_published: None, context_length, kv_cache_block_size: 0, }) } #[allow(dead_code)] async fn from_ngc_repo(_: &str) -> anyhow::Result { Err(anyhow::anyhow!( "ModelDeploymentCard::from_ngc_repo is not implemented" )) } async fn from_repo(repo_id: &str, model_name: &str) -> anyhow::Result { // This is usually the right choice let context_length = crate::file_json_field( &PathBuf::from(repo_id).join("config.json"), "max_position_embeddings", ) // But sometimes this is .or_else(|_| { crate::file_json_field( &PathBuf::from(repo_id).join("tokenizer_config.json"), "model_max_length", ) }) // If neither of those are present let the engine default it .unwrap_or(0); Ok(Self { display_name: model_name.to_string(), service_name: model_name.to_string(), model_info: Some(ModelInfoType::from_repo(repo_id).await?), tokenizer: Some(TokenizerKind::from_repo(repo_id).await?), gen_config: GenerationConfig::from_repo(repo_id).await.ok(), // optional prompt_formatter: PromptFormatterArtifact::from_repo(repo_id).await?, chat_template_file: PromptFormatterArtifact::chat_template_from_repo(repo_id).await?, prompt_context: None, // TODO - auto-detect prompt context revision: 0, last_published: None, context_length, kv_cache_block_size: 0, // set later }) } } impl ModelInfoType { pub async fn from_repo(repo_id: &str) -> Result { Self::try_is_hf_repo(repo_id) .await .with_context(|| format!("unable to extract model info from repo {}", repo_id)) } async fn try_is_hf_repo(repo: &str) -> anyhow::Result { Ok(Self::HfConfigJson( check_for_file(repo, "config.json").await?, )) } } impl PromptFormatterArtifact { pub async fn from_repo(repo_id: &str) -> Result> { // we should only error if we expect a prompt formatter and it's not found // right now, we don't know when to expect it, so we just return Ok(Some/None) Ok(Self::try_is_hf_repo(repo_id) .await .with_context(|| format!("unable to extract prompt format from repo {}", repo_id)) .ok()) } pub async fn chat_template_from_repo(repo_id: &str) -> Result> { Ok(Self::chat_template_try_is_hf_repo(repo_id) .await .with_context(|| format!("unable to extract prompt format from repo {}", repo_id)) .ok()) } async fn chat_template_try_is_hf_repo(repo: &str) -> anyhow::Result { Ok(Self::HfChatTemplate( check_for_file(repo, "chat_template.jinja").await?, )) } async fn try_is_hf_repo(repo: &str) -> anyhow::Result { Ok(Self::HfTokenizerConfigJson( check_for_file(repo, "tokenizer_config.json").await?, )) } } impl TokenizerKind { pub async fn from_repo(repo_id: &str) -> Result { Self::try_is_hf_repo(repo_id) .await .with_context(|| format!("unable to extract tokenizer kind from repo {}", repo_id)) } async fn try_is_hf_repo(repo: &str) -> anyhow::Result { Ok(Self::HfTokenizerJson( check_for_file(repo, "tokenizer.json").await?, )) } } impl GenerationConfig { pub async fn from_repo(repo_id: &str) -> Result { Self::try_is_hf_repo(repo_id) .await .with_context(|| format!("unable to extract generation config from repo {repo_id}")) } async fn try_is_hf_repo(repo: &str) -> anyhow::Result { Ok(Self::HfGenerationConfigJson( check_for_file(repo, "generation_config.json").await?, )) } } /// Checks if the provided path contains the expected file. async fn check_for_file(repo_id: &str, file: &str) -> anyhow::Result { let p = PathBuf::from(repo_id).join(file); let name = p.display().to_string(); if !p.exists() { anyhow::bail!("File not found: {name}") } Ok(name) } /// Checks if the provided path is a valid local repository path. /// /// # Arguments /// * `path` - Path to validate /// /// # Errors /// Returns an error if the path doesn't exist or isn't a directory fn check_valid_local_repo_path(path: impl AsRef) -> Result<()> { let path = path.as_ref(); if !path.exists() { return Err(anyhow::anyhow!( "Model path does not exist: {}", path.display() )); } if !path.is_dir() { return Err(anyhow::anyhow!( "Model path is not a directory: {}", path.display() )); } Ok(()) }