// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 //! # Model Deployment Card //! //! The ModelDeploymentCard (MDC) is the primary model configuration structure that will be available to any //! component that needs to interact with the model or its dependent artifacts. //! //! The ModelDeploymentCard contains LLM model deployment configuration information: //! - Display name and service name for the model //! - Model information (ModelInfoType) //! - Tokenizer configuration (TokenizerKind) //! - Prompt formatter settings (PromptFormatterArtifact) use std::fmt; use std::path::{Path, PathBuf}; use std::sync::{Arc, OnceLock}; use crate::common::checked_file::CheckedFile; use crate::local_model::runtime_config::ModelRuntimeConfig; use crate::model_type::{ModelInput, ModelType}; use anyhow::{Context, Result}; use derive_builder::Builder; use dynamo_runtime::{slug::Slug, storage::kv}; use serde::{Deserialize, Serialize}; use tokenizers::Tokenizer as HfTokenizer; use crate::preprocessor::media::{MediaDecoder, MediaFetcher}; use crate::protocols::TokenIdType; /// Identify model deployment cards in the key-value store pub const ROOT_PATH: &str = "v1/mdc"; #[derive(Serialize, Deserialize, Clone, Debug)] #[serde(rename_all = "snake_case")] pub enum ModelInfoType { HfConfigJson(CheckedFile), } impl ModelInfoType { pub fn checksum(&self) -> String { match self { ModelInfoType::HfConfigJson(c) => c.checksum().to_string(), } } pub fn is_local(&self) -> bool { match self { ModelInfoType::HfConfigJson(c) => c.is_local(), } } pub fn update_dir(&mut self, dir: &Path) { match self { ModelInfoType::HfConfigJson(c) => c.update_dir(dir), } } } #[derive(Serialize, Deserialize, Clone, Debug)] #[serde(rename_all = "snake_case")] pub enum TokenizerKind { HfTokenizerJson(CheckedFile), } impl TokenizerKind { pub fn checksum(&self) -> String { match self { TokenizerKind::HfTokenizerJson(c) => c.checksum().to_string(), } } pub fn is_local(&self) -> bool { match self { TokenizerKind::HfTokenizerJson(c) => c.is_local(), } } pub fn update_dir(&mut self, dir: &Path) { match self { TokenizerKind::HfTokenizerJson(c) => c.update_dir(dir), } } } /// Supported types of prompt formatters. /// /// We need a way to associate the prompt formatter template definition with an associated /// data model which is expected for rendering. /// /// All current prompt formatters are Jinja2 templates which use the OpenAI ChatCompletionRequest /// format. However, we currently do not have a discovery path to know if the model supports tool use /// unless we inspect the template. /// /// TODO(): Add an enum for the PromptFormatDataModel with at minimum arms for: /// - OaiChat /// - OaiChatToolUse #[derive(Serialize, Deserialize, Clone, Debug)] #[serde(rename_all = "snake_case")] pub enum PromptFormatterArtifact { HfTokenizerConfigJson(CheckedFile), HfChatTemplate { is_custom: bool, file: CheckedFile }, } impl PromptFormatterArtifact { pub fn checksum(&self) -> String { match self { PromptFormatterArtifact::HfTokenizerConfigJson(c) => c.checksum().to_string(), PromptFormatterArtifact::HfChatTemplate { file: c, .. } => c.checksum().to_string(), } } /// Is this file available locally pub fn is_local(&self) -> bool { match self { PromptFormatterArtifact::HfTokenizerConfigJson(c) => c.is_local(), PromptFormatterArtifact::HfChatTemplate { file: c, .. } => c.is_local(), } } pub fn update_dir(&mut self, dir: &Path) { match self { PromptFormatterArtifact::HfTokenizerConfigJson(c) => c.update_dir(dir), PromptFormatterArtifact::HfChatTemplate { file: c, .. } => c.update_dir(dir), } } pub fn is_custom(&self) -> bool { match self { PromptFormatterArtifact::HfTokenizerConfigJson(_) => false, PromptFormatterArtifact::HfChatTemplate { is_custom, .. } => *is_custom, } } } #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash)] #[serde(rename_all = "snake_case")] pub enum PromptContextMixin { /// Support OAI Chat Messages and Tools OaiChat, /// Enables templates with `{{datetime}}` to be rendered with the current date and time. Llama3DateTime, } #[derive(Serialize, Deserialize, Clone, Debug)] #[serde(rename_all = "snake_case")] pub enum GenerationConfig { HfGenerationConfigJson(CheckedFile), } impl GenerationConfig { pub fn checksum(&self) -> String { match self { GenerationConfig::HfGenerationConfigJson(c) => c.checksum().to_string(), } } pub fn is_local(&self) -> bool { match self { GenerationConfig::HfGenerationConfigJson(c) => c.is_local(), } } pub fn update_dir(&mut self, dir: &Path) { match self { GenerationConfig::HfGenerationConfigJson(c) => c.update_dir(dir), } } } /// Check if our model only has config fields for a Mistral-format model. fn is_exclusively_mistral_model(directory: &Path) -> bool { !directory.join("config.json").exists() && directory.join("params.json").exists() } #[derive(Serialize, Deserialize, Clone, Debug, Builder, Default)] pub struct ModelDeploymentCard { /// Human readable model name, e.g. "Meta Llama 3.1 8B Instruct" pub display_name: String, // Cache the Slugified display_name so we can share references to it slug: Slug, /// Original HuggingFace repository path for downloading model files. /// When `display_name` is customized (e.g., via `--served-model-name`), /// this field preserves the original repository path needed for downloads. /// Falls back to `display_name` if not set. #[serde(default, skip_serializing_if = "Option::is_none")] pub source_path: Option, /// Model information pub model_info: Option, /// Tokenizer configuration pub tokenizer: Option, /// Prompt Formatter configuration #[serde(default, skip_serializing_if = "Option::is_none")] pub prompt_formatter: Option, /// chat template may be stored as a separate file instead of in `prompt_formatter`. #[serde(default, skip_serializing_if = "Option::is_none")] pub chat_template_file: Option, /// Generation config - default sampling params #[serde(default, skip_serializing_if = "Option::is_none")] pub gen_config: Option, /// Prompt Formatter Config #[serde(default, skip_serializing_if = "Option::is_none")] pub prompt_context: Option>, /// Max context (in number of tokens) this model can handle pub context_length: u32, /// Size of a KV cache block - vllm only currently /// Passed to the engine and the KV router. pub kv_cache_block_size: u32, /// How many times a request can be migrated to another worker if the HTTP server lost /// connection to the current worker. pub migration_limit: u32, /// Specifies whether the model is a chat, completions, etc model. pub model_type: ModelType, /// Specifies the model input type. /// `Tokens` for engines that expect pre-processed input. /// `Text` for engines that take care of pre-processing themselves. pub model_input: ModelInput, /// LoRA metadata for routing #[serde(default, skip_serializing_if = "Option::is_none")] pub lora: Option, /// User-defined metadata for custom worker behavior #[serde(default, skip_serializing_if = "Option::is_none")] pub user_data: Option, #[serde(default)] pub runtime_config: ModelRuntimeConfig, /// Media decoding configuration #[serde(default)] pub media_decoder: Option, /// Media fetching configuration #[serde(default)] pub media_fetcher: Option, #[serde(skip, default)] checksum: OnceLock, } /// LoRA adapter information for routing decisions #[derive(Serialize, Deserialize, Clone, Debug)] pub struct LoraInfo { /// LoRA adapter name (e.g., "customer-123-v2") pub name: String, /// Maximum number of LoRA adapters that can be loaded at once on a single GPU #[serde(default, skip_serializing_if = "Option::is_none")] pub max_gpu_lora_count: Option, } impl ModelDeploymentCard { pub fn builder() -> ModelDeploymentCardBuilder { ModelDeploymentCardBuilder::default() } /// Create a ModelDeploymentCard where only the name is filled in. /// /// Single-process setups don't need an MDC to communicate model details, but it /// simplifies the code to assume we always have one. This is how you get one in those /// cases. A quasi-null object: pub fn with_name_only(name: &str) -> ModelDeploymentCard { ModelDeploymentCard { display_name: name.to_string(), slug: Slug::from_string(name), ..Default::default() } } /// Load a model deployment card from a JSON file pub fn load_from_json_file>(file: P) -> std::io::Result { let contents = std::fs::read_to_string(&file)?; Ok(serde_json::from_str(&contents).inspect_err(|err| { crate::log_json_err(&file.as_ref().display().to_string(), &contents, err) })?) } /// Load a model deployment card from a JSON string pub fn load_from_json_str(contents: &str) -> Result { Ok(serde_json::from_str(contents) .inspect_err(|err| crate::log_json_err("unknown", contents, err))?) } // // Methods // /// Save the model deployment card to a JSON file pub fn save_to_json_file(&self, file: &str) -> Result<(), anyhow::Error> { std::fs::write(file, self.to_json()?)?; Ok(()) } #[inline] pub fn name(&self) -> &str { &self.display_name } #[inline] pub fn slug(&self) -> &Slug { &self.slug } /// Serialize the model deployment card to a JSON string pub fn to_json(&self) -> Result { Ok(serde_json::to_string(self)?) } pub fn mdcsum(&self) -> &str { self.checksum .get_or_init(|| { // Only include the important fields let mut bytes_to_hash: Vec = Vec::with_capacity(512); bytes_to_hash.extend(self.display_name.as_bytes()); if let Some(source_path) = self.source_path.as_ref() { bytes_to_hash.extend(source_path.as_bytes()); } // The files can be either a URL or a local path, so we ignore that and hash their // checksum instead, which won't change wherever they are. if let Some(model_info) = self.model_info.as_ref() { bytes_to_hash.extend(model_info.checksum().as_bytes()); } if let Some(tokenizer) = self.tokenizer.as_ref() { bytes_to_hash.extend(tokenizer.checksum().as_bytes()); } if let Some(prompt_formatter) = self.prompt_formatter.as_ref() { bytes_to_hash.extend(prompt_formatter.checksum().as_bytes()); } if let Some(chat_template) = self.chat_template_file.as_ref() { bytes_to_hash.extend(chat_template.checksum().as_bytes()); } if let Some(gen_config) = self.gen_config.as_ref() { bytes_to_hash.extend(gen_config.checksum().as_bytes()); } if let Some(prompt_context_vec) = self.prompt_context.as_ref() { // Paste it as the bytes of the debug format. It's a Vec of enum, so this should be // fine. If the debug representation changes that only happens in a new release. bytes_to_hash.extend(format!("{prompt_context_vec:?}").as_bytes()); } bytes_to_hash.extend(self.context_length.to_be_bytes()); bytes_to_hash.extend(self.kv_cache_block_size.to_be_bytes()); // TODO: Do we want any of user_data or runtime_config? blake3::hash(&bytes_to_hash).to_string() }) .as_ref() } /// Is this a full model card with tokenizer? /// There are cases where we have a placeholder card (see `with_name_only`). pub fn has_tokenizer(&self) -> bool { self.tokenizer.is_some() } pub fn tokenizer_hf(&self) -> anyhow::Result { match &self.tokenizer { Some(TokenizerKind::HfTokenizerJson(checked_file)) => { let p = checked_file.path().ok_or_else(|| { anyhow::anyhow!("Tokenizer is URL-backed ({:?})", checked_file.url()) })?; HfTokenizer::from_file(p) .inspect_err(|err| { if let Some(serde_err) = err.downcast_ref::() && let Ok(contents) = std::fs::read_to_string(p) { crate::log_json_err(&p.display().to_string(), &contents, serde_err); } }) .map_err(anyhow::Error::msg) .with_context(|| p.display().to_string()) } None => { anyhow::bail!( "Blank ModelDeploymentCard does not have a tokenizer. Is this a mistral model? If so, the `--use--tokenizer` flag in the engine command is required." ); } } } pub(crate) fn set_source_path(&mut self, source_path: PathBuf) { self.source_path = Some(source_path.display().to_string()); } /// Allow user to override the name we register this model under. /// Corresponds to vllm's `--served-model-name`. pub fn set_name(&mut self, name: &str) { self.display_name = name.to_string(); self.slug = Slug::from_string(name); } pub fn source_path(&self) -> &str { self.source_path.as_ref().unwrap_or(&self.display_name) } /// Build an in-memory ModelDeploymentCard from a folder containing config.json, /// tokenizer.json and tokenizer_config.json (i.e. a huggingface repo checkout). /// Optional custom template. pub fn load_from_disk( config_path: impl AsRef, custom_template_path: Option<&Path>, ) -> anyhow::Result { Self::from_local_path(config_path.as_ref(), custom_template_path) } pub fn requires_preprocessing(&self) -> bool { matches!(self.model_input, ModelInput::Tokens) } /// Download the files this card needs to work: config.json, tokenizer.json, etc. pub async fn download_config(&mut self) -> anyhow::Result<()> { if self.has_local_files() { tracing::trace!("All model config is local, not downloading"); return Ok(()); } // For TensorBased models, config files are not used - they handle everything in the backend if self.model_type.supports_tensor() { tracing::debug!( display_name = %self.display_name, "Skipping config download for TensorBased model" ); return Ok(()); } let ignore_weights = true; let local_path = crate::hub::from_hf(self.source_path(), ignore_weights).await?; self.update_dir(&local_path); Ok(()) } /// Re-write all the local disk paths as a URL. Do this before publishing the MDC. /// The opposite of `move_to_url` is `update_dir`. pub fn move_to_url(&mut self, base_url: &str) -> anyhow::Result<()> { macro_rules! change { ($field:expr, $enum_variant:path) => { if let Some($enum_variant(src_file)) = $field.as_mut() && let Some(filename) = src_file .path() .and_then(|p| p.file_name()) .and_then(|f| f.to_str()) .map(|f| f.to_string()) { let hf_url = url::Url::parse(base_url) .and_then(|u| u.join(filename.as_ref())) .context(filename)?; src_file.move_to_url(hf_url); } }; } // config.json change!(self.model_info, ModelInfoType::HfConfigJson); // generation_config.json change!(self.gen_config, GenerationConfig::HfGenerationConfigJson); // tokenizer_config.json change!( self.prompt_formatter, PromptFormatterArtifact::HfTokenizerConfigJson ); // tokenizer.json change!(self.tokenizer, TokenizerKind::HfTokenizerJson); // We only "move" the chat template if it came form the repo. If we have a custom template // file we cannot download that from HF. if let Some(PromptFormatterArtifact::HfChatTemplate { file: src_file, is_custom, }) = self.chat_template_file.as_mut() { if *is_custom { tracing::info!( "Detected custom chat template. Ensure file exists in the same location on all hosts." ); } else if let Some(filename) = src_file .path() .and_then(|p| p.file_name()) .and_then(|f| f.to_str()) .map(|f| f.to_string()) { let hf_url = url::Url::parse(base_url) .and_then(|u| u.join(filename.as_ref())) .context(filename)?; src_file.move_to_url(hf_url); } } Ok(()) } /// Are all the files we need (tokenizer.json, etc) available locally? fn has_local_files(&self) -> bool { let has_model_info = self .model_info .as_ref() .map(|p| p.is_local()) .unwrap_or(true); let has_tokenizer = self .tokenizer .as_ref() .map(|p| p.is_local()) .unwrap_or(true); let has_prompt_formatter = self .prompt_formatter .as_ref() .map(|p| p.is_local()) .unwrap_or(true); let has_chat_template_file = self .chat_template_file .as_ref() .map(|p| p.is_local()) .unwrap_or(true); let has_gen_config = self .gen_config .as_ref() .map(|p| p.is_local()) .unwrap_or(true); has_model_info && has_tokenizer && has_prompt_formatter && has_chat_template_file && has_gen_config } /// Update the directory for files like tokenizer.json be in here. fn update_dir(&mut self, dir: &Path) { if let Some(model_info) = self.model_info.as_mut() { model_info.update_dir(dir); } if let Some(tk) = self.tokenizer.as_mut() { tk.update_dir(dir); } if let Some(pf) = self.prompt_formatter.as_mut() { pf.update_dir(dir); } if let Some(gc) = self.gen_config.as_mut() { gc.update_dir(dir); } // If it's a custom chat template we didn't download it, so leave the path untouched if let Some(ct) = self.chat_template_file.as_mut() && !ct.is_custom() { ct.update_dir(dir); } } /// Creates a ModelDeploymentCard from a local directory path. /// /// Currently HuggingFace format is supported and following files are expected: /// - config.json: Model configuration in HuggingFace format /// - tokenizer.json: Tokenizer configuration in HuggingFace format /// - tokenizer_config.json: Optional prompt formatter configuration /// /// # Arguments /// * `local_root_dir` - Path to the local model directory /// /// # Errors /// Returns an error if: /// - The path doesn't exist or isn't a directory /// - The path contains invalid Unicode characters /// - Required model files are missing or invalid fn from_local_path( local_path: impl AsRef, custom_template_path: Option<&Path>, ) -> anyhow::Result { check_valid_local_repo_path(&local_path)?; Self::from_repo_checkout(&local_path, custom_template_path) } fn from_repo_checkout( local_path: impl AsRef, custom_template_path: Option<&Path>, ) -> anyhow::Result { let local_path = local_path.as_ref(); // This is usually the right choice let context_length = crate::file_json_field(&local_path.join("config.json"), "max_position_embeddings") // But sometimes this is .or_else(|_| { crate::file_json_field( &local_path.join("tokenizer_config.json"), "model_max_length", ) }) // If neither of those are present let the engine default it .unwrap_or(0); let is_mistral_model = is_exclusively_mistral_model(local_path); let (model_info, tokenizer, gen_config, prompt_formatter) = if !is_mistral_model { ( Some(ModelInfoType::from_disk(local_path)?), Some(TokenizerKind::from_disk(local_path)?), GenerationConfig::from_disk(local_path).ok(), PromptFormatterArtifact::from_disk(local_path)?, ) } else { (None, None, None, None) }; // Load chat template - either custom or from repo let chat_template_file = if is_mistral_model { None } else if let Some(template_path) = custom_template_path { if !template_path.exists() { anyhow::bail!( "Custom template file does not exist: {}", template_path.display() ); } // Verify the file is readable let _template_content = std::fs::read_to_string(template_path).with_context(|| { format!( "Failed to read custom template file: {}", template_path.display() ) })?; Some(PromptFormatterArtifact::HfChatTemplate { is_custom: custom_template_path.is_some(), file: CheckedFile::from_disk(template_path)?, }) } else { PromptFormatterArtifact::chat_template_from_disk(local_path)? }; // This gets replaced when we `set_name` let display_name = local_path.display().to_string(); Ok(Self { slug: Slug::from_string(&display_name), display_name, source_path: None, model_info, tokenizer, gen_config, prompt_formatter, chat_template_file, prompt_context: None, // TODO - auto-detect prompt context context_length, kv_cache_block_size: 0, // set later migration_limit: 0, model_type: Default::default(), // set later model_input: Default::default(), // set later lora: None, user_data: None, runtime_config: ModelRuntimeConfig::default(), media_decoder: None, media_fetcher: None, checksum: OnceLock::new(), }) } } impl PartialEq for ModelDeploymentCard { fn eq(&self, other: &ModelDeploymentCard) -> bool { self.mdcsum() == other.mdcsum() } } /// A ModelDeploymentCard is published a single time per instance and never updated. impl kv::Versioned for ModelDeploymentCard { fn revision(&self) -> u64 { 0 } fn set_revision(&mut self, _revision: u64) {} } impl fmt::Display for ModelDeploymentCard { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.slug()) } } pub trait ModelInfo: Send + Sync { /// Model type fn model_type(&self) -> String; /// Token ID for the beginning of sequence (optional - not all models have it) fn bos_token_id(&self) -> Option; /// Token ID for the end of sequence fn eos_token_ids(&self) -> Vec; /// Maximum position embeddings / max sequence length /// TODO: This is only used in a single test, no other code. Remove? fn max_position_embeddings(&self) -> Option; /// Vocabulary size /// TODO: This is only used in a single test, no other code. Remove? fn vocab_size(&self) -> Option; } impl ModelInfoType { pub fn get_model_info(&self) -> Result> { match self { Self::HfConfigJson(checked_file) => { let Some(path) = checked_file.path() else { anyhow::bail!("model info is not a local path: {checked_file:?}"); }; Ok(HFConfig::from_json_file(path)?) } } } } #[derive(Debug, Clone, Serialize, Deserialize)] struct HFConfig { /// denotes the mixin to the flattened data model which can be present /// in the config.json file architectures: Vec, /// general model type model_type: String, text_config: Option, // Sometimes it's inside HFTextConfig, sometimes it's here eos_token_id: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] struct HFTextConfig { // Optional - not all models have a bos_token_id bos_token_id: Option, eos_token_id: Option, #[serde(default)] final_eos_token_ids: Vec, /// max sequence length max_position_embeddings: Option, /// number of layers in the model /// Optional because some multimodal models (e.g., LLaVA) don't include this in text_config num_hidden_layers: Option, /// number of attention heads in the model num_attention_heads: Option, /// Vocabulary size vocab_size: Option, } impl HFConfig { fn from_json_file>(file: P) -> Result> { let file_path = file.as_ref(); let contents = std::fs::read_to_string(file_path)?; let mut config: Self = json_five::from_str(&contents) .inspect_err(|err| { tracing::error!(path=%file_path.display(), %err, "Failed to parse config.json as JSON5"); })?; if config.text_config.is_none() { let text_config: HFTextConfig = json_five::from_str(&contents) .inspect_err(|err| { tracing::error!(path=%file_path.display(), %err, "Failed to parse text config from config.json as JSON5"); })?; config.text_config = Some(text_config); } let Some(text_config) = config.text_config.as_mut() else { anyhow::bail!( "Missing text config fields (model_type, eos_token_ids, etc) in config.json" ); }; let gencfg_path = file_path .parent() .unwrap_or_else(|| Path::new("")) .join("generation_config.json"); // bos_token_id is optional - not all models have it // Try to load from generation_config.json if not in config.json if text_config.bos_token_id.is_none() { text_config.bos_token_id = crate::file_json_field::(&gencfg_path, "bos_token_id").ok(); } // TODO: refactor this when we switch to per-architecture tokenization // eos_token_id can appear in multiple places, and as suggested by HuggingFace // community that the priority should be: // 1. generation_config.json; // 2. config.json, or text_config field in config.json. // https://github.com/huggingface/transformers/issues/25395#issuecomment-1671863257 let final_eos_token_ids: Vec = { // Firstly check the generation_config.json crate::file_json_field::(&gencfg_path, "eos_token_id") .inspect_err( |err| tracing::warn!(%err, "Missing eos_token_id in generation_config.json"), ) .ok().and_then(|v| { if v.is_number() { v.as_number() .and_then(|n| n.as_u64()) .map(|n| vec![n as TokenIdType]) } else if v.is_array() { let arr = v.as_array().unwrap(); Some( arr.iter() .filter_map(|inner_v| { inner_v .as_number() .and_then(|n| n.as_u64()) .map(|n| n as TokenIdType) }) .collect(), ) } else { None } }) }.or_else(|| { // Check config.json and text_config config .eos_token_id .as_ref() .or(text_config.eos_token_id.as_ref()) .and_then(|v| { if v.is_number() { v.as_number() .and_then(|n| n.as_u64()) .map(|n| vec![n as TokenIdType]) } else { serde_json::from_value(v.clone()) .map(Some) .unwrap_or_else(|err| { tracing::error!( ?v, path = %file_path.display(), "eos_token_id is not a number or an array, cannot deserialize: {err}", ); None }) } }) }) .ok_or_else(|| { anyhow::anyhow!( "missing eos_token_id in config.json and generation_config.json, cannot load" ) })?; text_config.final_eos_token_ids = final_eos_token_ids; Ok(Arc::new(config)) } } impl ModelInfo for HFConfig { fn model_type(&self) -> String { self.model_type.clone() } fn bos_token_id(&self) -> Option { self.text_config.as_ref().and_then(|tc| tc.bos_token_id) } fn eos_token_ids(&self) -> Vec { self.text_config .as_ref() .unwrap() .final_eos_token_ids .clone() } fn max_position_embeddings(&self) -> Option { self.text_config.as_ref().unwrap().max_position_embeddings } fn vocab_size(&self) -> Option { self.text_config.as_ref().unwrap().vocab_size } } impl ModelInfoType { pub fn from_disk(directory: &Path) -> Result { let f = CheckedFile::from_disk(directory.join("config.json")).with_context(|| { format!( "unable to extract config.json from directory {}", directory.display() ) })?; Ok(Self::HfConfigJson(f)) } } impl GenerationConfig { pub fn from_disk(directory: &Path) -> Result { let f = CheckedFile::from_disk(directory.join("generation_config.json")).with_context( || { format!( "unable to extract generation_config from directory {}", directory.display() ) }, )?; Ok(Self::HfGenerationConfigJson(f)) } } impl PromptFormatterArtifact { pub fn from_disk(directory: &Path) -> Result> { // we should only error if we expect a prompt formatter and it's not found // right now, we don't know when to expect it, so we just return Ok(Some/None) match CheckedFile::from_disk(directory.join("tokenizer_config.json")) { Ok(f) => Ok(Some(Self::HfTokenizerConfigJson(f))), Err(_) => Ok(None), } } pub fn chat_template_from_disk(directory: &Path) -> Result> { match CheckedFile::from_disk(directory.join("chat_template.jinja")) { Ok(f) => Ok(Some(Self::HfChatTemplate { file: f, is_custom: false, })), Err(_) => Ok(None), } } } impl TokenizerKind { pub fn from_disk(directory: &Path) -> Result { let f = CheckedFile::from_disk(directory.join("tokenizer.json")).with_context(|| { format!( "unable to extract tokenizer kind from directory {}", directory.display() ) })?; Ok(Self::HfTokenizerJson(f)) } } /// Checks if the provided path is a valid local repository path. /// /// # Arguments /// * `path` - Path to validate /// /// # Errors /// Returns an error if the path doesn't exist or isn't a directory fn check_valid_local_repo_path(path: impl AsRef) -> Result<()> { let path = path.as_ref(); if !path.exists() { return Err(anyhow::anyhow!( "Model path does not exist: {}", path.display() )); } if !path.is_dir() { return Err(anyhow::anyhow!( "Model path is not a directory: {}", path.display() )); } Ok(()) } #[cfg(test)] mod tests { use super::HFConfig; use std::collections::HashSet; use std::path::Path; #[test] pub fn test_config_json_llama3() -> anyhow::Result<()> { let config_file = Path::new(env!("CARGO_MANIFEST_DIR")) .join("tests/data/sample-models/mock-llama-3.1-8b-instruct/config.json"); let config = HFConfig::from_json_file(&config_file)?; assert_eq!(config.bos_token_id(), Some(128000)); // eos_token_ids can be in any order as long as the set is correct let eos_token_id_set: HashSet<_> = config.eos_token_ids().iter().cloned().collect(); assert_eq!(eos_token_id_set, vec![128001, 128009].into_iter().collect()); Ok(()) } #[test] pub fn test_config_json_llama4() -> anyhow::Result<()> { let config_file = Path::new(env!("CARGO_MANIFEST_DIR")) .join("tests/data/sample-models/Llama-4-Scout-17B-16E-Instruct/config.json"); let config = HFConfig::from_json_file(&config_file)?; assert_eq!(config.bos_token_id(), Some(200000)); Ok(()) } /// The Python JSON parser accepts `Infinity` as a numeric value. This is explicitly against the /// JSON spec, but inevitably people rely on it, so we have to allow it. /// We treat that file as JSON5 (a lenient superset of JSON) to be able to parse it. #[test] fn test_invalid_json_but_py_accepts_it() { dynamo_runtime::logging::init(); let path = "tests/data/sample-models/NVIDIA-Nemotron-Nano-12B-v2-Base/config.json"; let _ = HFConfig::from_json_file(path).unwrap(); } }