// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 //! # Model Deployment Card //! //! The ModelDeploymentCard (MDC) is the primary model configuration structure that will be available to any //! component that needs to interact with the model or its dependent artifacts. //! //! The ModelDeploymentCard contains LLM model deployment configuration information: //! - Display name and service name for the model //! - Model information (ModelInfoType) //! - Tokenizer configuration (TokenizerKind) //! - Prompt formatter settings (PromptFormatterArtifact) //! - Various metadata like revision, publish time, etc. use std::fmt; use std::fs::File; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::Duration; use crate::local_model::runtime_config::ModelRuntimeConfig; use anyhow::{Context, Result}; use derive_builder::Builder; use dynamo_runtime::{slug::Slug, storage::key_value_store::Versioned, transports::nats}; use serde::{Deserialize, Serialize}; use tokenizers::Tokenizer as HfTokenizer; use url::Url; use crate::gguf::{Content, ContentConfig, ModelConfigLike}; use crate::protocols::TokenIdType; /// Identify model deployment cards in the key-value store pub const ROOT_PATH: &str = "mdc"; /// If a model deployment card hasn't been refreshed in this much time the worker is likely gone const CARD_MAX_AGE: chrono::TimeDelta = chrono::TimeDelta::minutes(5); #[derive(Serialize, Deserialize, Clone, Debug)] #[serde(rename_all = "snake_case")] pub enum ModelInfoType { HfConfigJson(String), GGUF(PathBuf), } #[derive(Serialize, Deserialize, Clone, Debug)] #[serde(rename_all = "snake_case")] pub enum TokenizerKind { HfTokenizerJson(String), GGUF(Box), } /// Supported types of prompt formatters. /// /// We need a way to associate the prompt formatter template definition with an associated /// data model which is expected for rendering. /// /// All current prompt formatters are Jinja2 templates which use the OpenAI ChatCompletionRequest /// format. However, we currently do not have a discovery path to know if the model supports tool use /// unless we inspect the template. /// /// TODO(): Add an enum for the PromptFormatDataModel with at minimum arms for: /// - OaiChat /// - OaiChatToolUse #[derive(Serialize, Deserialize, Clone, Debug)] #[serde(rename_all = "snake_case")] pub enum PromptFormatterArtifact { HfTokenizerConfigJson(String), HfChatTemplate(String), GGUF(PathBuf), } #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash)] #[serde(rename_all = "snake_case")] pub enum PromptContextMixin { /// Support OAI Chat Messages and Tools OaiChat, /// Enables templates with `{{datetime}}` to be rendered with the current date and time. Llama3DateTime, } #[derive(Serialize, Deserialize, Clone, Debug)] #[serde(rename_all = "snake_case")] pub enum GenerationConfig { HfGenerationConfigJson(String), GGUF(PathBuf), } #[derive(Serialize, Deserialize, Clone, Debug, Builder, Default)] pub struct ModelDeploymentCard { /// Human readable model name, e.g. "Meta Llama 3.1 8B Instruct" pub display_name: String, // Cache the Slugified display_name so we can share references to it slug: Slug, /// Model information pub model_info: Option, /// Tokenizer configuration pub tokenizer: Option, /// Prompt Formatter configuration #[serde(default, skip_serializing_if = "Option::is_none")] pub prompt_formatter: Option, /// chat template may be stored as a separate file instead of in `prompt_formatter`. #[serde(default, skip_serializing_if = "Option::is_none")] pub chat_template_file: Option, /// Generation config - default sampling params #[serde(default, skip_serializing_if = "Option::is_none")] pub gen_config: Option, /// Prompt Formatter Config #[serde(default, skip_serializing_if = "Option::is_none")] pub prompt_context: Option>, /// When this card was last advertised by a worker. None if not yet published. pub last_published: Option>, /// Incrementing count of how many times we published this card #[serde(default, skip_serializing)] pub revision: u64, /// Max context (in number of tokens) this model can handle pub context_length: u32, /// Size of a KV cache block - vllm only currently /// Passed to the engine and the KV router. pub kv_cache_block_size: u32, /// How many times a request can be migrated to another worker if the HTTP server lost /// connection to the current worker. pub migration_limit: u32, /// User-defined metadata for custom worker behavior #[serde(default, skip_serializing_if = "Option::is_none")] pub user_data: Option, #[serde(default)] pub runtime_config: ModelRuntimeConfig, } impl ModelDeploymentCard { pub fn builder() -> ModelDeploymentCardBuilder { ModelDeploymentCardBuilder::default() } /// Create a ModelDeploymentCard where only the name is filled in. /// /// Single-process setups don't need an MDC to communicate model details, but it /// simplifies the code to assume we always have one. This is how you get one in those /// cases. A quasi-null object: pub fn with_name_only(name: &str) -> ModelDeploymentCard { ModelDeploymentCard { display_name: name.to_string(), slug: Slug::from_string(name), ..Default::default() } } /// How often we should check if a model deployment card expired because it's workers are gone pub fn expiry_check_period() -> Duration { match CARD_MAX_AGE.to_std() { Ok(duration) => duration / 3, Err(_) => { // Only happens if CARD_MAX_AGE is negative, which it isn't unreachable!("Cannot run card expiry watcher, invalid CARD_MAX_AGE"); } } } /// Load a model deployment card from a JSON file pub fn load_from_json_file>(file: P) -> std::io::Result { Ok(serde_json::from_str(&std::fs::read_to_string(file)?)?) } /// Load a model deployment card from a JSON string pub fn load_from_json_str(json: &str) -> Result { Ok(serde_json::from_str(json)?) } // // Methods // /// Save the model deployment card to a JSON file pub fn save_to_json_file(&self, file: &str) -> Result<(), anyhow::Error> { std::fs::write(file, self.to_json()?)?; Ok(()) } pub fn slug(&self) -> &Slug { &self.slug } /// Serialize the model deployment card to a JSON string pub fn to_json(&self) -> Result { Ok(serde_json::to_string(self)?) } pub fn mdcsum(&self) -> String { let json = self.to_json().unwrap(); format!("{}", blake3::hash(json.as_bytes())) } /// Was this card last published a long time ago, suggesting the worker is gone? pub fn is_expired(&self) -> bool { if let Some(last_published) = self.last_published.as_ref() { chrono::Utc::now() - last_published > CARD_MAX_AGE } else { false } } /// Is this a full model card with tokenizer? /// There are cases where we have a placeholder card (see `with_name_only`). pub fn has_tokenizer(&self) -> bool { self.tokenizer.is_some() } pub fn tokenizer_hf(&self) -> anyhow::Result { match &self.tokenizer { Some(TokenizerKind::HfTokenizerJson(file)) => { HfTokenizer::from_file(file).map_err(anyhow::Error::msg) } Some(TokenizerKind::GGUF(t)) => Ok(*t.clone()), None => { anyhow::bail!("Blank ModelDeploymentCard does not have a tokenizer"); } } } pub fn is_gguf(&self) -> bool { match &self.model_info { Some(info) => info.is_gguf(), None => false, } } /// Move the files this MDC uses into the NATS object store. /// Updates the URI's to point to NATS. pub async fn move_to_nats(&mut self, nats_client: nats::Client) -> Result<()> { let nats_addr = nats_client.addr(); let bucket_name = self.slug().clone(); tracing::debug!( nats_addr, %bucket_name, "Uploading model deployment card fields to NATS" ); macro_rules! nats_upload { ($field:expr, $enum_variant:path, $filename:literal) => { if let Some($enum_variant(src_file)) = $field.take() { if !nats::is_nats_url(&src_file) { let target = format!("nats://{nats_addr}/{bucket_name}/{}", $filename); nats_client .object_store_upload( &std::path::PathBuf::from(&src_file), url::Url::parse(&target)?, ) .await?; $field = Some($enum_variant(target)); } } }; } nats_upload!(self.model_info, ModelInfoType::HfConfigJson, "config.json"); nats_upload!( self.prompt_formatter, PromptFormatterArtifact::HfTokenizerConfigJson, "tokenizer_config.json" ); nats_upload!( self.chat_template_file, PromptFormatterArtifact::HfChatTemplate, "chat_template.jinja" ); nats_upload!( self.tokenizer, TokenizerKind::HfTokenizerJson, "tokenizer.json" ); nats_upload!( self.gen_config, GenerationConfig::HfGenerationConfigJson, "generation_config.json" ); Ok(()) } /// Move the files this MDC uses from the NATS object store to local disk. /// Updates the URI's to point to the created files. /// /// The returned TempDir must be kept alive, it cleans up on drop. pub async fn move_from_nats(&mut self, nats_client: nats::Client) -> Result { let nats_addr = nats_client.addr(); let bucket_name = self.slug(); let target_dir = tempfile::TempDir::with_prefix(bucket_name.to_string())?; tracing::debug!( nats_addr, %bucket_name, target_dir = %target_dir.path().display(), "Downloading model deployment card fields from NATS" ); macro_rules! nats_download { ($field:expr, $enum_variant:path, $filename:literal) => { if let Some($enum_variant(src_url)) = $field.take() { if nats::is_nats_url(&src_url) { let target = target_dir.path().join($filename); nats_client .object_store_download(Url::parse(&src_url)?, &target) .await?; $field = Some($enum_variant(target.display().to_string())); } } }; } nats_download!(self.model_info, ModelInfoType::HfConfigJson, "config.json"); nats_download!( self.prompt_formatter, PromptFormatterArtifact::HfTokenizerConfigJson, "tokenizer_config.json" ); nats_download!( self.chat_template_file, PromptFormatterArtifact::HfChatTemplate, "chat_template.jinja" ); nats_download!( self.tokenizer, TokenizerKind::HfTokenizerJson, "tokenizer.json" ); nats_download!( self.gen_config, GenerationConfig::HfGenerationConfigJson, "generation_config.json" ); Ok(target_dir) } /// Delete this card from the key-value store and it's URLs from the object store pub async fn delete_from_nats(&mut self, nats_client: nats::Client) -> Result<()> { let nats_addr = nats_client.addr(); let bucket_name = self.slug(); tracing::trace!( nats_addr, %bucket_name, "Delete model deployment card from NATS" ); nats_client .object_store_delete_bucket(bucket_name.as_ref()) .await } /// Allow user to override the name we register this model under. /// Corresponds to vllm's `--served-model-name`. pub fn set_name(&mut self, name: &str) { self.display_name = name.to_string(); self.slug = Slug::from_string(name); } /// Build an in-memory ModelDeploymentCard from either: /// - a folder containing config.json, tokenizer.json and token_config.json /// - a GGUF file /// With an optional custom template pub fn load( config_path: impl AsRef, custom_template_path: Option<&Path>, ) -> anyhow::Result { let config_path = config_path.as_ref(); if config_path.is_dir() { Self::from_local_path(config_path, custom_template_path) } else { // GGUF files don't support custom templates yet if custom_template_path.is_some() { anyhow::bail!("Custom templates are not supported for GGUF files"); } Self::from_gguf(config_path) } } /// Creates a ModelDeploymentCard from a local directory path. /// /// Currently HuggingFace format is supported and following files are expected: /// - config.json: Model configuration in HuggingFace format /// - tokenizer.json: Tokenizer configuration in HuggingFace format /// - tokenizer_config.json: Optional prompt formatter configuration /// /// # Arguments /// * `local_root_dir` - Path to the local model directory /// /// # Errors /// Returns an error if: /// - The path doesn't exist or isn't a directory /// - The path contains invalid Unicode characters /// - Required model files are missing or invalid fn from_local_path( local_root_dir: impl AsRef, custom_template_path: Option<&Path>, ) -> anyhow::Result { let local_root_dir = local_root_dir.as_ref(); check_valid_local_repo_path(local_root_dir)?; let repo_id = local_root_dir .canonicalize()? .to_str() .ok_or_else(|| anyhow::anyhow!("Path contains invalid Unicode"))? .to_string(); let model_name = local_root_dir .file_name() .and_then(|n| n.to_str()) .ok_or_else(|| anyhow::anyhow!("Invalid model directory name"))?; Self::from_repo(&repo_id, model_name, custom_template_path) } fn from_gguf(gguf_file: &Path) -> anyhow::Result { let model_name = gguf_file .iter() .next_back() .map(|n| n.to_string_lossy().to_string()); let Some(model_name) = model_name else { // I think this would only happy on an empty path anyhow::bail!( "Could not extract model name from path '{}'", gguf_file.display() ); }; // TODO: we do this in HFConfig also, unify let content = load_gguf(gguf_file)?; let context_length = content.get_metadata()[&format!("{}.context_length", content.arch())] .to_u32() .unwrap_or(0); tracing::debug!(context_length, "Loaded context length from GGUF"); Ok(Self { display_name: model_name.to_string(), slug: Slug::from_string(model_name), model_info: Some(ModelInfoType::GGUF(gguf_file.to_path_buf())), tokenizer: Some(TokenizerKind::from_gguf(gguf_file)?), gen_config: None, // AFAICT there is no equivalent in a GGUF prompt_formatter: Some(PromptFormatterArtifact::GGUF(gguf_file.to_path_buf())), chat_template_file: None, prompt_context: None, // TODO - auto-detect prompt context revision: 0, last_published: None, context_length, kv_cache_block_size: 0, migration_limit: 0, user_data: None, runtime_config: ModelRuntimeConfig::default(), }) } fn from_repo( repo_id: &str, model_name: &str, custom_template_path: Option<&Path>, ) -> anyhow::Result { // This is usually the right choice let context_length = crate::file_json_field( &PathBuf::from(repo_id).join("config.json"), "max_position_embeddings", ) // But sometimes this is .or_else(|_| { crate::file_json_field( &PathBuf::from(repo_id).join("tokenizer_config.json"), "model_max_length", ) }) // If neither of those are present let the engine default it .unwrap_or(0); // Load chat template - either custom or from repo let chat_template_file = if let Some(template_path) = custom_template_path { if !template_path.exists() { anyhow::bail!( "Custom template file does not exist: {}", template_path.display() ); } // Verify the file is readable let _template_content = std::fs::read_to_string(template_path).with_context(|| { format!( "Failed to read custom template file: {}", template_path.display() ) })?; Some(PromptFormatterArtifact::HfChatTemplate( template_path.display().to_string(), )) } else { PromptFormatterArtifact::chat_template_from_repo(repo_id)? }; Ok(Self { display_name: model_name.to_string(), slug: Slug::from_string(model_name), model_info: Some(ModelInfoType::from_repo(repo_id)?), tokenizer: Some(TokenizerKind::from_repo(repo_id)?), gen_config: GenerationConfig::from_repo(repo_id).ok(), // optional prompt_formatter: PromptFormatterArtifact::from_repo(repo_id)?, chat_template_file, prompt_context: None, // TODO - auto-detect prompt context revision: 0, last_published: None, context_length, kv_cache_block_size: 0, // set later migration_limit: 0, user_data: None, runtime_config: ModelRuntimeConfig::default(), }) } } impl Versioned for ModelDeploymentCard { fn revision(&self) -> u64 { self.revision } fn set_revision(&mut self, revision: u64) { self.last_published = Some(chrono::Utc::now()); self.revision = revision; } } impl fmt::Display for ModelDeploymentCard { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.slug()) } } pub trait ModelInfo: Send + Sync { /// Model type fn model_type(&self) -> String; /// Token ID for the beginning of sequence fn bos_token_id(&self) -> TokenIdType; /// Token ID for the end of sequence fn eos_token_ids(&self) -> Vec; /// Maximum position embeddings / max sequence length /// TODO: This is only used in a single test, no other code. Remove? fn max_position_embeddings(&self) -> Option; /// Vocabulary size /// TODO: This is only used in a single test, no other code. Remove? fn vocab_size(&self) -> Option; } impl ModelInfoType { pub fn get_model_info(&self) -> Result> { match self { Self::HfConfigJson(info) => HFConfig::from_json_file(info), Self::GGUF(path) => HFConfig::from_gguf(path), } } pub fn is_gguf(&self) -> bool { matches!(self, Self::GGUF(_)) } } #[derive(Debug, Clone, Serialize, Deserialize)] struct HFConfig { /// denotes the mixin to the flattened data model which can be present /// in the config.json file architectures: Vec, /// general model type model_type: String, text_config: Option, // Sometimes it's inside HFTextConfig, sometimes it's here eos_token_id: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] struct HFTextConfig { // It can take multiple attempts to load this, so Option bos_token_id: Option, // We set this once bos_token_id is loaded so we don't have to deal with Option #[serde(default)] final_bos_token_id: TokenIdType, eos_token_id: Option, #[serde(default)] final_eos_token_ids: Vec, /// max sequence length max_position_embeddings: Option, /// number of layers in the model num_hidden_layers: usize, /// number of attention heads in the model num_attention_heads: Option, /// Vocabulary size vocab_size: Option, } impl HFConfig { fn from_json_file(file: &str) -> Result> { let file_pathbuf = PathBuf::from(file); let contents = std::fs::read_to_string(file)?; let mut config: Self = serde_json::from_str(&contents)?; if config.text_config.is_none() { let text_config: HFTextConfig = serde_json::from_str(&contents)?; config.text_config = Some(text_config); } // Sometimes bos_token_id is in generation_config.json not config.json let Some(text_config) = config.text_config.as_mut() else { anyhow::bail!( "Missing text config fields (model_type, eos_token_ids, etc) in config.json" ); }; if text_config.bos_token_id.is_none() { let bos_token_id = crate::file_json_field::( &Path::join( file_pathbuf.parent().unwrap_or(&PathBuf::from("")), "generation_config.json", ), "bos_token_id", ) .context( "missing bos_token_id in generation_config.json and config.json, cannot load", )?; text_config.bos_token_id = Some(bos_token_id); } // Now that we have it for sure, set it in the non-Option field let final_bos_token_id = text_config.bos_token_id.take().unwrap(); text_config.final_bos_token_id = final_bos_token_id; // TODO: refactor this when we switch to per-architecture tokenization let final_eos_token_ids: Vec = config .eos_token_id .as_ref() .or(text_config.eos_token_id.as_ref()) .and_then(|v| { if v.is_number() { v.as_number() .and_then(|n| n.as_u64()) .map(|n| vec![n as TokenIdType]) } else if v.is_array() { let arr = v.as_array().unwrap(); // Safety: We just checked Some( arr.iter() .filter_map(|inner_v| { inner_v .as_number() .and_then(|n| n.as_u64()) .map(|n| n as TokenIdType) }) .collect(), ) } else { tracing::error!( ?v, file, "eos_token_id is not a number or an array, cannot use" ); None } }) .or_else(|| { // Maybe it's in generation_config.json crate::file_json_field( &Path::join( file_pathbuf.parent().unwrap_or(&PathBuf::from("")), "generation_config.json", ), "eos_token_id", ) .inspect_err( |err| tracing::warn!(%err, "Missing eos_token_id in generation_config.json"), ) .ok() }) .ok_or_else(|| { anyhow::anyhow!( "missing eos_token_id in config.json and generation_config.json, cannot load" ) })?; text_config.final_eos_token_ids = final_eos_token_ids; Ok(Arc::new(config)) } fn from_gguf(gguf_file: &Path) -> Result> { let content = load_gguf(gguf_file)?; let model_config_metadata: ContentConfig = (&content).into(); let num_hidden_layers = content.get_metadata()[&format!("{}.block_count", content.arch())].to_u32()? as usize; let bos_token_id = content.get_metadata()["tokenizer.ggml.bos_token_id"].to_u32()?; let eos_token_id = content.get_metadata()["tokenizer.ggml.eos_token_id"].to_u32()?; // to_vec returns a Vec that's already there, so it's cheap let vocab_size = content.get_metadata()["tokenizer.ggml.tokens"] .to_vec()? .len(); let arch = content.arch().to_string(); Ok(Arc::new(HFConfig { architectures: vec![format!("{}ForCausalLM", capitalize(&arch))], // "general.architecture" model_type: arch, text_config: Some(HFTextConfig { bos_token_id: None, final_bos_token_id: bos_token_id, eos_token_id: None, final_eos_token_ids: vec![eos_token_id], // "llama.context_length" max_position_embeddings: Some(model_config_metadata.max_seq_len()), // "llama.block_count" num_hidden_layers, // "llama.attention.head_count" num_attention_heads: Some(model_config_metadata.num_attn_heads()), // "tokenizer.ggml.tokens".len() vocab_size: Some(vocab_size), }), eos_token_id: None, })) } } impl ModelInfo for HFConfig { fn model_type(&self) -> String { self.model_type.clone() } fn bos_token_id(&self) -> TokenIdType { self.text_config.as_ref().unwrap().final_bos_token_id } fn eos_token_ids(&self) -> Vec { self.text_config .as_ref() .unwrap() .final_eos_token_ids .clone() } fn max_position_embeddings(&self) -> Option { self.text_config.as_ref().unwrap().max_position_embeddings } fn vocab_size(&self) -> Option { self.text_config.as_ref().unwrap().vocab_size } } impl TokenizerKind { pub fn from_gguf(gguf_file: &Path) -> anyhow::Result { let content = load_gguf(gguf_file)?; let out = crate::gguf::convert_gguf_to_hf_tokenizer(&content) .with_context(|| gguf_file.display().to_string())?; Ok(TokenizerKind::GGUF(Box::new(out.tokenizer))) } } pub(crate) fn load_gguf(gguf_file: &Path) -> anyhow::Result { let filename = gguf_file.display().to_string(); let mut f = File::open(gguf_file).with_context(|| filename.clone())?; // vec because GGUF can be split into multiple files (shards) let mut readers = vec![&mut f]; crate::gguf::Content::from_readers(&mut readers).with_context(|| filename.clone()) } fn capitalize(s: &str) -> String { let mut chars = s.chars(); match chars.next() { None => String::new(), Some(first) => first.to_uppercase().collect::() + &chars.as_str().to_lowercase(), } } impl ModelInfoType { pub fn from_repo(repo_id: &str) -> Result { Self::try_is_hf_repo(repo_id) .with_context(|| format!("unable to extract model info from repo {}", repo_id)) } fn try_is_hf_repo(repo: &str) -> anyhow::Result { Ok(Self::HfConfigJson(check_for_file(repo, "config.json")?)) } } impl PromptFormatterArtifact { pub fn from_repo(repo_id: &str) -> Result> { // we should only error if we expect a prompt formatter and it's not found // right now, we don't know when to expect it, so we just return Ok(Some/None) Ok(Self::try_is_hf_repo(repo_id) .with_context(|| format!("unable to extract prompt format from repo {}", repo_id)) .ok()) } pub fn chat_template_from_repo(repo_id: &str) -> Result> { Ok(Self::chat_template_try_is_hf_repo(repo_id) .with_context(|| format!("unable to extract prompt format from repo {}", repo_id)) .ok()) } fn chat_template_try_is_hf_repo(repo: &str) -> anyhow::Result { Ok(Self::HfChatTemplate(check_for_file( repo, "chat_template.jinja", )?)) } fn try_is_hf_repo(repo: &str) -> anyhow::Result { Ok(Self::HfTokenizerConfigJson(check_for_file( repo, "tokenizer_config.json", )?)) } } impl TokenizerKind { pub fn from_repo(repo_id: &str) -> Result { Self::try_is_hf_repo(repo_id) .with_context(|| format!("unable to extract tokenizer kind from repo {}", repo_id)) } fn try_is_hf_repo(repo: &str) -> anyhow::Result { Ok(Self::HfTokenizerJson(check_for_file( repo, "tokenizer.json", )?)) } } impl GenerationConfig { pub fn from_repo(repo_id: &str) -> Result { Self::try_is_hf_repo(repo_id) .with_context(|| format!("unable to extract generation config from repo {repo_id}")) } fn try_is_hf_repo(repo: &str) -> anyhow::Result { Ok(Self::HfGenerationConfigJson(check_for_file( repo, "generation_config.json", )?)) } } /// Checks if the provided path contains the expected file. fn check_for_file(repo_id: &str, file: &str) -> anyhow::Result { let p = PathBuf::from(repo_id).join(file); let name = p.display().to_string(); if !p.exists() { anyhow::bail!("File not found: {name}") } Ok(name) } /// Checks if the provided path is a valid local repository path. /// /// # Arguments /// * `path` - Path to validate /// /// # Errors /// Returns an error if the path doesn't exist or isn't a directory fn check_valid_local_repo_path(path: impl AsRef) -> Result<()> { let path = path.as_ref(); if !path.exists() { return Err(anyhow::anyhow!( "Model path does not exist: {}", path.display() )); } if !path.is_dir() { return Err(anyhow::anyhow!( "Model path is not a directory: {}", path.display() )); } Ok(()) } #[cfg(test)] mod tests { use super::HFConfig; use std::path::Path; #[test] pub fn test_config_json_llama3() -> anyhow::Result<()> { let config_file = Path::new(env!("CARGO_MANIFEST_DIR")) .join("tests/data/sample-models/mock-llama-3.1-8b-instruct/config.json"); let config = HFConfig::from_json_file(&config_file.display().to_string())?; assert_eq!(config.bos_token_id(), 128000); Ok(()) } #[test] pub fn test_config_json_llama4() -> anyhow::Result<()> { let config_file = Path::new(env!("CARGO_MANIFEST_DIR")) .join("tests/data/sample-models/Llama-4-Scout-17B-16E-Instruct/config.json"); let config = HFConfig::from_json_file(&config_file.display().to_string())?; assert_eq!(config.bos_token_id(), 200000); Ok(()) } }