Unverified Commit f6ae58e3 authored by Graham King's avatar Graham King Committed by GitHub
Browse files

fix(llm): Send HF URLs to frontend not full paths (#5290)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent cbbaa6b7
...@@ -360,7 +360,12 @@ fn register_llm<'p>( ...@@ -360,7 +360,12 @@ fn register_llm<'p>(
let mut builder = dynamo_llm::local_model::LocalModelBuilder::default(); let mut builder = dynamo_llm::local_model::LocalModelBuilder::default();
builder builder
// model path is the physical path on disk of the downloaded model
.model_path(model_path) .model_path(model_path)
// source path is what the user gave as `--model-path`, either a real path (in which
// case it matches model_path above), or an HF repo.
.source_path(source_path.clone().into())
// --served_model_name
.model_name(model_name.clone()) .model_name(model_name.clone())
.context_length(context_length) .context_length(context_length)
.kv_cache_block_size(kv_cache_block_size) .kv_cache_block_size(kv_cache_block_size)
...@@ -386,7 +391,10 @@ fn register_llm<'p>( ...@@ -386,7 +391,10 @@ fn register_llm<'p>(
if let Some(lora_name) = lora_identifier { if let Some(lora_name) = lora_identifier {
tracing::info!("Registered LoRA '{}' MDC", lora_name); tracing::info!("Registered LoRA '{}' MDC", lora_name);
} else { } else {
tracing::info!("Registered base model '{:?}' MDC", model_name); tracing::info!(
"Registered base model '{}' MDC",
model_name.unwrap_or(source_path)
);
} }
Ok(()) Ok(())
......
...@@ -116,7 +116,15 @@ impl CheckedFile { ...@@ -116,7 +116,15 @@ impl CheckedFile {
*path = new_path; *path = new_path;
} }
} }
Either::Right(_) => tracing::warn!("Cannot update directory on URL"), Either::Right(url) => {
let Some(filename) = url.path().split('/').next_back().filter(|s| !s.is_empty())
else {
tracing::warn!(%url, "Cannot update directory on invalid URL");
return;
};
let p = dir.join(filename);
self.path = Either::Left(p);
}
} }
} }
} }
......
...@@ -24,6 +24,7 @@ use crate::{ ...@@ -24,6 +24,7 @@ use crate::{
}, },
}; };
use anyhow::Context as _;
use dynamo_runtime::{ use dynamo_runtime::{
DistributedRuntime, DistributedRuntime,
component::Client, component::Client,
...@@ -191,9 +192,11 @@ where ...@@ -191,9 +192,11 @@ where
Pin<Box<dyn AsyncEngineStream<Annotated<BackendOutput>>>>, Pin<Box<dyn AsyncEngineStream<Annotated<BackendOutput>>>>,
>, >,
{ {
let PromptFormatter::OAI(formatter) = PromptFormatter::from_mdc(card)?; let PromptFormatter::OAI(formatter) =
PromptFormatter::from_mdc(card).context("PromptFormatter.from_mdc")?;
let preprocessor = let preprocessor =
OpenAIPreprocessor::new_with_parts(card.clone(), formatter, hf_tokenizer.clone())?; OpenAIPreprocessor::new_with_parts(card.clone(), formatter, hf_tokenizer.clone())
.context("OpenAIPreprocessor.new_with_parts")?;
build_routed_pipeline_with_preprocessor( build_routed_pipeline_with_preprocessor(
card, card,
client, client,
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
use std::fs; use std::fs;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use anyhow::Context as _;
use dynamo_runtime::component::Endpoint; use dynamo_runtime::component::Endpoint;
use dynamo_runtime::discovery::DiscoveryInstance; use dynamo_runtime::discovery::DiscoveryInstance;
use dynamo_runtime::discovery::DiscoverySpec; use dynamo_runtime::discovery::DiscoverySpec;
...@@ -36,6 +37,7 @@ pub const DEFAULT_HTTP_PORT: u16 = 8080; ...@@ -36,6 +37,7 @@ pub const DEFAULT_HTTP_PORT: u16 = 8080;
pub struct LocalModelBuilder { pub struct LocalModelBuilder {
model_path: Option<PathBuf>, model_path: Option<PathBuf>,
source_path: Option<PathBuf>,
model_name: Option<String>, model_name: Option<String>,
endpoint_id: Option<EndpointId>, endpoint_id: Option<EndpointId>,
context_length: Option<u32>, context_length: Option<u32>,
...@@ -70,6 +72,7 @@ impl Default for LocalModelBuilder { ...@@ -70,6 +72,7 @@ impl Default for LocalModelBuilder {
tls_cert_path: Default::default(), tls_cert_path: Default::default(),
tls_key_path: Default::default(), tls_key_path: Default::default(),
model_path: Default::default(), model_path: Default::default(),
source_path: Default::default(),
model_name: Default::default(), model_name: Default::default(),
endpoint_id: Default::default(), endpoint_id: Default::default(),
context_length: Default::default(), context_length: Default::default(),
...@@ -91,12 +94,20 @@ impl Default for LocalModelBuilder { ...@@ -91,12 +94,20 @@ impl Default for LocalModelBuilder {
} }
impl LocalModelBuilder { impl LocalModelBuilder {
/// The path must exist /// The path must exist, the model is already downloaded
pub fn model_path(&mut self, model_path: PathBuf) -> &mut Self { pub fn model_path(&mut self, model_path: PathBuf) -> &mut Self {
self.model_path = Some(model_path); self.model_path = Some(model_path);
self self
} }
/// The HF name of the model before we downloaded it, or a local path if
/// that was given on the cmd line. We need this because `model_path` is always
/// a local path.
pub fn source_path(&mut self, source_path: PathBuf) -> &mut Self {
self.source_path = Some(source_path);
self
}
pub fn model_name(&mut self, model_name: Option<String>) -> &mut Self { pub fn model_name(&mut self, model_name: Option<String>) -> &mut Self {
self.model_name = model_name; self.model_name = model_name;
self self
...@@ -311,14 +322,15 @@ impl LocalModelBuilder { ...@@ -311,14 +322,15 @@ impl LocalModelBuilder {
let mut card = let mut card =
ModelDeploymentCard::load_from_disk(&model_path, self.custom_template_path.as_deref())?; ModelDeploymentCard::load_from_disk(&model_path, self.custom_template_path.as_deref())?;
// Source path is the `--model-path` the user passed. By now our `model_path` is the local
// path of the downloaded model.
if let Some(source_path) = self.source_path.take() {
card.set_source_path(source_path);
}
// The served model name defaults to the full model path. // The served model name defaults to the full model path.
// This matches what vllm and sglang do. // This matches what vllm and sglang do.
card.set_name( let alt = card.source_path().to_string();
&self card.set_name(self.model_name.as_deref().unwrap_or(&alt));
.model_name
.clone()
.unwrap_or_else(|| model_path.display().to_string()),
);
card.kv_cache_block_size = self.kv_cache_block_size; card.kv_cache_block_size = self.kv_cache_block_size;
...@@ -443,12 +455,6 @@ impl LocalModel { ...@@ -443,12 +455,6 @@ impl LocalModel {
self.custom_backend_metrics_polling_interval self.custom_backend_metrics_polling_interval
} }
pub fn is_gguf(&self) -> bool {
// GGUF is the only file (not-folder) we accept, so we don't need to check the extension
// We will error when we come to parse it
self.full_path.is_file()
}
/// An endpoint to identify this model by. /// An endpoint to identify this model by.
pub fn endpoint_id(&self) -> &EndpointId { pub fn endpoint_id(&self) -> &EndpointId {
&self.endpoint_id &self.endpoint_id
...@@ -491,6 +497,23 @@ impl LocalModel { ...@@ -491,6 +497,23 @@ impl LocalModel {
suffix_for_log suffix_for_log
); );
let source_path = PathBuf::from(self.card.source_path());
if !source_path.exists() {
// The consumers of MDC (frontend) might not have the same local path as us, so
// replace disk paths with a custom URL like "hf://Qwen/Qwen3-0.6B/config.json".
//
// We can't do this if the model came from disk, as it might not be the same version
// as on Hugging Face (if it exists there at all).
//
// The URL is not used by anything. Frontend will download the repo and edit these
// paths to be local, so only the filename part matters currently.
// Possibly we should just use the filenames here. The URL feels nicer to me, it makes
// each field fully identified and fetchable independently.
self.card
.move_to_url(&format!("hf://{}/", self.card.source_path()))
.context("move_to_url")?;
}
// Register the Model Deployment Card via discovery interface // Register the Model Deployment Card via discovery interface
// The model_suffix (for LoRA) will be appended AFTER the instance_id // The model_suffix (for LoRA) will be appended AFTER the instance_id
let discovery = endpoint.drt().discovery(); let discovery = endpoint.drt().discovery();
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
//! - Prompt formatter settings (PromptFormatterArtifact) //! - Prompt formatter settings (PromptFormatterArtifact)
use std::fmt; use std::fmt;
use std::path::Path; use std::path::{Path, PathBuf};
use std::sync::{Arc, OnceLock}; use std::sync::{Arc, OnceLock};
use crate::common::checked_file::CheckedFile; use crate::common::checked_file::CheckedFile;
...@@ -99,14 +99,14 @@ impl TokenizerKind { ...@@ -99,14 +99,14 @@ impl TokenizerKind {
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum PromptFormatterArtifact { pub enum PromptFormatterArtifact {
HfTokenizerConfigJson(CheckedFile), HfTokenizerConfigJson(CheckedFile),
HfChatTemplate(CheckedFile), HfChatTemplate { is_custom: bool, file: CheckedFile },
} }
impl PromptFormatterArtifact { impl PromptFormatterArtifact {
pub fn checksum(&self) -> String { pub fn checksum(&self) -> String {
match self { match self {
PromptFormatterArtifact::HfTokenizerConfigJson(c) => c.checksum().to_string(), PromptFormatterArtifact::HfTokenizerConfigJson(c) => c.checksum().to_string(),
PromptFormatterArtifact::HfChatTemplate(c) => c.checksum().to_string(), PromptFormatterArtifact::HfChatTemplate { file: c, .. } => c.checksum().to_string(),
} }
} }
...@@ -114,14 +114,21 @@ impl PromptFormatterArtifact { ...@@ -114,14 +114,21 @@ impl PromptFormatterArtifact {
pub fn is_local(&self) -> bool { pub fn is_local(&self) -> bool {
match self { match self {
PromptFormatterArtifact::HfTokenizerConfigJson(c) => c.is_local(), PromptFormatterArtifact::HfTokenizerConfigJson(c) => c.is_local(),
PromptFormatterArtifact::HfChatTemplate(c) => c.is_local(), PromptFormatterArtifact::HfChatTemplate { file: c, .. } => c.is_local(),
} }
} }
pub fn update_dir(&mut self, dir: &Path) { pub fn update_dir(&mut self, dir: &Path) {
match self { match self {
PromptFormatterArtifact::HfTokenizerConfigJson(c) => c.update_dir(dir), PromptFormatterArtifact::HfTokenizerConfigJson(c) => c.update_dir(dir),
PromptFormatterArtifact::HfChatTemplate(c) => c.update_dir(dir), PromptFormatterArtifact::HfChatTemplate { file: c, .. } => c.update_dir(dir),
}
}
pub fn is_custom(&self) -> bool {
match self {
PromptFormatterArtifact::HfTokenizerConfigJson(_) => false,
PromptFormatterArtifact::HfChatTemplate { is_custom, .. } => *is_custom,
} }
} }
} }
...@@ -180,7 +187,7 @@ pub struct ModelDeploymentCard { ...@@ -180,7 +187,7 @@ pub struct ModelDeploymentCard {
/// this field preserves the original repository path needed for downloads. /// this field preserves the original repository path needed for downloads.
/// Falls back to `display_name` if not set. /// Falls back to `display_name` if not set.
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub source_model: Option<String>, pub source_path: Option<String>,
/// Model information /// Model information
pub model_info: Option<ModelInfoType>, pub model_info: Option<ModelInfoType>,
...@@ -305,6 +312,9 @@ impl ModelDeploymentCard { ...@@ -305,6 +312,9 @@ impl ModelDeploymentCard {
// Only include the important fields // Only include the important fields
let mut bytes_to_hash: Vec<u8> = Vec::with_capacity(512); let mut bytes_to_hash: Vec<u8> = Vec::with_capacity(512);
bytes_to_hash.extend(self.display_name.as_bytes()); bytes_to_hash.extend(self.display_name.as_bytes());
if let Some(source_path) = self.source_path.as_ref() {
bytes_to_hash.extend(source_path.as_bytes());
}
// The files can be either a URL or a local path, so we ignore that and hash their // The files can be either a URL or a local path, so we ignore that and hash their
// checksum instead, which won't change wherever they are. // checksum instead, which won't change wherever they are.
...@@ -371,17 +381,21 @@ impl ModelDeploymentCard { ...@@ -371,17 +381,21 @@ impl ModelDeploymentCard {
} }
} }
pub(crate) fn set_source_path(&mut self, source_path: PathBuf) {
self.source_path = Some(source_path.display().to_string());
}
/// Allow user to override the name we register this model under. /// Allow user to override the name we register this model under.
/// Corresponds to vllm's `--served-model-name`. /// Corresponds to vllm's `--served-model-name`.
pub fn set_name(&mut self, name: &str) { pub fn set_name(&mut self, name: &str) {
// Preserve original model path before overwriting display_name
if self.source_model.is_none() && !self.display_name.is_empty() {
self.source_model = Some(self.display_name.clone());
}
self.display_name = name.to_string(); self.display_name = name.to_string();
self.slug = Slug::from_string(name); self.slug = Slug::from_string(name);
} }
pub fn source_path(&self) -> &str {
self.source_path.as_ref().unwrap_or(&self.display_name)
}
/// Build an in-memory ModelDeploymentCard from a folder containing config.json, /// Build an in-memory ModelDeploymentCard from a folder containing config.json,
/// tokenizer.json and tokenizer_config.json (i.e. a huggingface repo checkout). /// tokenizer.json and tokenizer_config.json (i.e. a huggingface repo checkout).
/// Optional custom template. /// Optional custom template.
...@@ -413,13 +427,73 @@ impl ModelDeploymentCard { ...@@ -413,13 +427,73 @@ impl ModelDeploymentCard {
} }
let ignore_weights = true; let ignore_weights = true;
let model_name = self.source_model.as_ref().unwrap_or(&self.display_name); let local_path = crate::hub::from_hf(self.source_path(), ignore_weights).await?;
let local_path = crate::hub::from_hf(model_name, ignore_weights).await?;
self.update_dir(&local_path); self.update_dir(&local_path);
Ok(()) Ok(())
} }
/// Re-write all the local disk paths as a URL. Do this before publishing the MDC.
/// The opposite of `move_to_url` is `update_dir`.
pub fn move_to_url(&mut self, base_url: &str) -> anyhow::Result<()> {
macro_rules! change {
($field:expr, $enum_variant:path) => {
if let Some($enum_variant(src_file)) = $field.as_mut()
&& let Some(filename) = src_file
.path()
.and_then(|p| p.file_name())
.and_then(|f| f.to_str())
.map(|f| f.to_string())
{
let hf_url = url::Url::parse(base_url)
.and_then(|u| u.join(filename.as_ref()))
.context(filename)?;
src_file.move_to_url(hf_url);
}
};
}
// config.json
change!(self.model_info, ModelInfoType::HfConfigJson);
// generation_config.json
change!(self.gen_config, GenerationConfig::HfGenerationConfigJson);
// tokenizer_config.json
change!(
self.prompt_formatter,
PromptFormatterArtifact::HfTokenizerConfigJson
);
// tokenizer.json
change!(self.tokenizer, TokenizerKind::HfTokenizerJson);
// We only "move" the chat template if it came form the repo. If we have a custom template
// file we cannot download that from HF.
if let Some(PromptFormatterArtifact::HfChatTemplate {
file: src_file,
is_custom,
}) = self.chat_template_file.as_mut()
{
if *is_custom {
tracing::info!(
"Detected custom chat template. Ensure file exists in the same location on all hosts."
);
} else if let Some(filename) = src_file
.path()
.and_then(|p| p.file_name())
.and_then(|f| f.to_str())
.map(|f| f.to_string())
{
let hf_url = url::Url::parse(base_url)
.and_then(|u| u.join(filename.as_ref()))
.context(filename)?;
src_file.move_to_url(hf_url);
}
}
Ok(())
}
/// Are all the files we need (tokenizer.json, etc) available locally? /// Are all the files we need (tokenizer.json, etc) available locally?
fn has_local_files(&self) -> bool { fn has_local_files(&self) -> bool {
let has_model_info = self let has_model_info = self
...@@ -466,12 +540,15 @@ impl ModelDeploymentCard { ...@@ -466,12 +540,15 @@ impl ModelDeploymentCard {
if let Some(pf) = self.prompt_formatter.as_mut() { if let Some(pf) = self.prompt_formatter.as_mut() {
pf.update_dir(dir); pf.update_dir(dir);
} }
if let Some(ct) = self.chat_template_file.as_mut() {
ct.update_dir(dir);
}
if let Some(gc) = self.gen_config.as_mut() { if let Some(gc) = self.gen_config.as_mut() {
gc.update_dir(dir); gc.update_dir(dir);
} }
// If it's a custom chat template we didn't download it, so leave the path untouched
if let Some(ct) = self.chat_template_file.as_mut()
&& !ct.is_custom()
{
ct.update_dir(dir);
}
} }
/// Creates a ModelDeploymentCard from a local directory path. /// Creates a ModelDeploymentCard from a local directory path.
...@@ -548,18 +625,21 @@ impl ModelDeploymentCard { ...@@ -548,18 +625,21 @@ impl ModelDeploymentCard {
) )
})?; })?;
Some(PromptFormatterArtifact::HfChatTemplate( Some(PromptFormatterArtifact::HfChatTemplate {
CheckedFile::from_disk(template_path)?, is_custom: custom_template_path.is_some(),
)) file: CheckedFile::from_disk(template_path)?,
})
} else { } else {
PromptFormatterArtifact::chat_template_from_disk(local_path)? PromptFormatterArtifact::chat_template_from_disk(local_path)?
}; };
// This gets replaced when we `set_name`
let display_name = local_path.display().to_string(); let display_name = local_path.display().to_string();
Ok(Self { Ok(Self {
slug: Slug::from_string(&display_name), slug: Slug::from_string(&display_name),
display_name, display_name,
source_model: None, source_path: None,
model_info, model_info,
tokenizer, tokenizer,
gen_config, gen_config,
...@@ -846,7 +926,10 @@ impl PromptFormatterArtifact { ...@@ -846,7 +926,10 @@ impl PromptFormatterArtifact {
pub fn chat_template_from_disk(directory: &Path) -> Result<Option<Self>> { pub fn chat_template_from_disk(directory: &Path) -> Result<Option<Self>> {
match CheckedFile::from_disk(directory.join("chat_template.jinja")) { match CheckedFile::from_disk(directory.join("chat_template.jinja")) {
Ok(f) => Ok(Some(Self::HfChatTemplate(f))), Ok(f) => Ok(Some(Self::HfChatTemplate {
file: f,
is_custom: false,
})),
Err(_) => Ok(None), Err(_) => Ok(None),
} }
} }
......
...@@ -42,8 +42,12 @@ impl PromptFormatter { ...@@ -42,8 +42,12 @@ impl PromptFormatter {
mdc.display_name mdc.display_name
); );
}; };
let contents = std::fs::read_to_string(file) let contents = std::fs::read_to_string(file).with_context(|| {
.with_context(|| format!("fs:read_to_string '{}'", file.display()))?; format!(
"PromptFormatter.from_mdc fs:read_to_string '{}'",
file.display()
)
})?;
let mut config: ChatTemplate = let mut config: ChatTemplate =
serde_json::from_str(&contents).inspect_err(|err| { serde_json::from_str(&contents).inspect_err(|err| {
crate::log_json_err(&file.display().to_string(), &contents, err) crate::log_json_err(&file.display().to_string(), &contents, err)
...@@ -53,8 +57,9 @@ impl PromptFormatter { ...@@ -53,8 +57,9 @@ impl PromptFormatter {
// stores the chat template in a separate file, we check if the file exists and // stores the chat template in a separate file, we check if the file exists and
// put the chat template into config as normalization. // put the chat template into config as normalization.
// This may also be a custom template provided via CLI flag. // This may also be a custom template provided via CLI flag.
if let Some(PromptFormatterArtifact::HfChatTemplate(checked_file)) = if let Some(PromptFormatterArtifact::HfChatTemplate {
mdc.chat_template_file.as_ref() file: checked_file, ..
}) = mdc.chat_template_file.as_ref()
{ {
let Some(chat_template_file) = checked_file.path() else { let Some(chat_template_file) = checked_file.path() else {
anyhow::bail!( anyhow::bail!(
...@@ -77,7 +82,7 @@ impl PromptFormatter { ...@@ -77,7 +82,7 @@ impl PromptFormatter {
.map_or(ContextMixins::default(), |x| ContextMixins::new(&x)), .map_or(ContextMixins::default(), |x| ContextMixins::new(&x)),
) )
} }
PromptFormatterArtifact::HfChatTemplate(_) => Err(anyhow::anyhow!( PromptFormatterArtifact::HfChatTemplate { .. } => Err(anyhow::anyhow!(
"prompt_formatter should not have type HfChatTemplate" "prompt_formatter should not have type HfChatTemplate"
)), )),
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment