Unverified Commit f6ae58e3 authored by Graham King's avatar Graham King Committed by GitHub
Browse files

fix(llm): Send HF URLs to frontend not full paths (#5290)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent cbbaa6b7
......@@ -360,7 +360,12 @@ fn register_llm<'p>(
let mut builder = dynamo_llm::local_model::LocalModelBuilder::default();
builder
// model path is the physical path on disk of the downloaded model
.model_path(model_path)
// source path is what the user gave as `--model-path`, either a real path (in which
// case it matches model_path above), or an HF repo.
.source_path(source_path.clone().into())
// --served_model_name
.model_name(model_name.clone())
.context_length(context_length)
.kv_cache_block_size(kv_cache_block_size)
......@@ -386,7 +391,10 @@ fn register_llm<'p>(
if let Some(lora_name) = lora_identifier {
tracing::info!("Registered LoRA '{}' MDC", lora_name);
} else {
tracing::info!("Registered base model '{:?}' MDC", model_name);
tracing::info!(
"Registered base model '{}' MDC",
model_name.unwrap_or(source_path)
);
}
Ok(())
......
......@@ -116,7 +116,15 @@ impl CheckedFile {
*path = new_path;
}
}
Either::Right(_) => tracing::warn!("Cannot update directory on URL"),
Either::Right(url) => {
let Some(filename) = url.path().split('/').next_back().filter(|s| !s.is_empty())
else {
tracing::warn!(%url, "Cannot update directory on invalid URL");
return;
};
let p = dir.join(filename);
self.path = Either::Left(p);
}
}
}
}
......
......@@ -24,6 +24,7 @@ use crate::{
},
};
use anyhow::Context as _;
use dynamo_runtime::{
DistributedRuntime,
component::Client,
......@@ -191,9 +192,11 @@ where
Pin<Box<dyn AsyncEngineStream<Annotated<BackendOutput>>>>,
>,
{
let PromptFormatter::OAI(formatter) = PromptFormatter::from_mdc(card)?;
let PromptFormatter::OAI(formatter) =
PromptFormatter::from_mdc(card).context("PromptFormatter.from_mdc")?;
let preprocessor =
OpenAIPreprocessor::new_with_parts(card.clone(), formatter, hf_tokenizer.clone())?;
OpenAIPreprocessor::new_with_parts(card.clone(), formatter, hf_tokenizer.clone())
.context("OpenAIPreprocessor.new_with_parts")?;
build_routed_pipeline_with_preprocessor(
card,
client,
......
......@@ -4,6 +4,7 @@
use std::fs;
use std::path::{Path, PathBuf};
use anyhow::Context as _;
use dynamo_runtime::component::Endpoint;
use dynamo_runtime::discovery::DiscoveryInstance;
use dynamo_runtime::discovery::DiscoverySpec;
......@@ -36,6 +37,7 @@ pub const DEFAULT_HTTP_PORT: u16 = 8080;
pub struct LocalModelBuilder {
model_path: Option<PathBuf>,
source_path: Option<PathBuf>,
model_name: Option<String>,
endpoint_id: Option<EndpointId>,
context_length: Option<u32>,
......@@ -70,6 +72,7 @@ impl Default for LocalModelBuilder {
tls_cert_path: Default::default(),
tls_key_path: Default::default(),
model_path: Default::default(),
source_path: Default::default(),
model_name: Default::default(),
endpoint_id: Default::default(),
context_length: Default::default(),
......@@ -91,12 +94,20 @@ impl Default for LocalModelBuilder {
}
impl LocalModelBuilder {
/// The path must exist
/// The path must exist, the model is already downloaded
pub fn model_path(&mut self, model_path: PathBuf) -> &mut Self {
self.model_path = Some(model_path);
self
}
/// The HF name of the model before we downloaded it, or a local path if
/// that was given on the cmd line. We need this because `model_path` is always
/// a local path.
pub fn source_path(&mut self, source_path: PathBuf) -> &mut Self {
self.source_path = Some(source_path);
self
}
pub fn model_name(&mut self, model_name: Option<String>) -> &mut Self {
self.model_name = model_name;
self
......@@ -311,14 +322,15 @@ impl LocalModelBuilder {
let mut card =
ModelDeploymentCard::load_from_disk(&model_path, self.custom_template_path.as_deref())?;
// Source path is the `--model-path` the user passed. By now our `model_path` is the local
// path of the downloaded model.
if let Some(source_path) = self.source_path.take() {
card.set_source_path(source_path);
}
// The served model name defaults to the full model path.
// This matches what vllm and sglang do.
card.set_name(
&self
.model_name
.clone()
.unwrap_or_else(|| model_path.display().to_string()),
);
let alt = card.source_path().to_string();
card.set_name(self.model_name.as_deref().unwrap_or(&alt));
card.kv_cache_block_size = self.kv_cache_block_size;
......@@ -443,12 +455,6 @@ impl LocalModel {
self.custom_backend_metrics_polling_interval
}
pub fn is_gguf(&self) -> bool {
// GGUF is the only file (not-folder) we accept, so we don't need to check the extension
// We will error when we come to parse it
self.full_path.is_file()
}
/// An endpoint to identify this model by.
pub fn endpoint_id(&self) -> &EndpointId {
&self.endpoint_id
......@@ -491,6 +497,23 @@ impl LocalModel {
suffix_for_log
);
let source_path = PathBuf::from(self.card.source_path());
if !source_path.exists() {
// The consumers of MDC (frontend) might not have the same local path as us, so
// replace disk paths with a custom URL like "hf://Qwen/Qwen3-0.6B/config.json".
//
// We can't do this if the model came from disk, as it might not be the same version
// as on Hugging Face (if it exists there at all).
//
// The URL is not used by anything. Frontend will download the repo and edit these
// paths to be local, so only the filename part matters currently.
// Possibly we should just use the filenames here. The URL feels nicer to me, it makes
// each field fully identified and fetchable independently.
self.card
.move_to_url(&format!("hf://{}/", self.card.source_path()))
.context("move_to_url")?;
}
// Register the Model Deployment Card via discovery interface
// The model_suffix (for LoRA) will be appended AFTER the instance_id
let discovery = endpoint.drt().discovery();
......
......@@ -13,7 +13,7 @@
//! - Prompt formatter settings (PromptFormatterArtifact)
use std::fmt;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::sync::{Arc, OnceLock};
use crate::common::checked_file::CheckedFile;
......@@ -99,14 +99,14 @@ impl TokenizerKind {
#[serde(rename_all = "snake_case")]
pub enum PromptFormatterArtifact {
HfTokenizerConfigJson(CheckedFile),
HfChatTemplate(CheckedFile),
HfChatTemplate { is_custom: bool, file: CheckedFile },
}
impl PromptFormatterArtifact {
pub fn checksum(&self) -> String {
match self {
PromptFormatterArtifact::HfTokenizerConfigJson(c) => c.checksum().to_string(),
PromptFormatterArtifact::HfChatTemplate(c) => c.checksum().to_string(),
PromptFormatterArtifact::HfChatTemplate { file: c, .. } => c.checksum().to_string(),
}
}
......@@ -114,14 +114,21 @@ impl PromptFormatterArtifact {
pub fn is_local(&self) -> bool {
match self {
PromptFormatterArtifact::HfTokenizerConfigJson(c) => c.is_local(),
PromptFormatterArtifact::HfChatTemplate(c) => c.is_local(),
PromptFormatterArtifact::HfChatTemplate { file: c, .. } => c.is_local(),
}
}
pub fn update_dir(&mut self, dir: &Path) {
match self {
PromptFormatterArtifact::HfTokenizerConfigJson(c) => c.update_dir(dir),
PromptFormatterArtifact::HfChatTemplate(c) => c.update_dir(dir),
PromptFormatterArtifact::HfChatTemplate { file: c, .. } => c.update_dir(dir),
}
}
pub fn is_custom(&self) -> bool {
match self {
PromptFormatterArtifact::HfTokenizerConfigJson(_) => false,
PromptFormatterArtifact::HfChatTemplate { is_custom, .. } => *is_custom,
}
}
}
......@@ -180,7 +187,7 @@ pub struct ModelDeploymentCard {
/// this field preserves the original repository path needed for downloads.
/// Falls back to `display_name` if not set.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub source_model: Option<String>,
pub source_path: Option<String>,
/// Model information
pub model_info: Option<ModelInfoType>,
......@@ -305,6 +312,9 @@ impl ModelDeploymentCard {
// Only include the important fields
let mut bytes_to_hash: Vec<u8> = Vec::with_capacity(512);
bytes_to_hash.extend(self.display_name.as_bytes());
if let Some(source_path) = self.source_path.as_ref() {
bytes_to_hash.extend(source_path.as_bytes());
}
// The files can be either a URL or a local path, so we ignore that and hash their
// checksum instead, which won't change wherever they are.
......@@ -371,17 +381,21 @@ impl ModelDeploymentCard {
}
}
pub(crate) fn set_source_path(&mut self, source_path: PathBuf) {
self.source_path = Some(source_path.display().to_string());
}
/// Allow user to override the name we register this model under.
/// Corresponds to vllm's `--served-model-name`.
pub fn set_name(&mut self, name: &str) {
// Preserve original model path before overwriting display_name
if self.source_model.is_none() && !self.display_name.is_empty() {
self.source_model = Some(self.display_name.clone());
}
self.display_name = name.to_string();
self.slug = Slug::from_string(name);
}
pub fn source_path(&self) -> &str {
self.source_path.as_ref().unwrap_or(&self.display_name)
}
/// Build an in-memory ModelDeploymentCard from a folder containing config.json,
/// tokenizer.json and tokenizer_config.json (i.e. a huggingface repo checkout).
/// Optional custom template.
......@@ -413,13 +427,73 @@ impl ModelDeploymentCard {
}
let ignore_weights = true;
let model_name = self.source_model.as_ref().unwrap_or(&self.display_name);
let local_path = crate::hub::from_hf(model_name, ignore_weights).await?;
let local_path = crate::hub::from_hf(self.source_path(), ignore_weights).await?;
self.update_dir(&local_path);
Ok(())
}
/// Re-write all the local disk paths as a URL. Do this before publishing the MDC.
/// The opposite of `move_to_url` is `update_dir`.
pub fn move_to_url(&mut self, base_url: &str) -> anyhow::Result<()> {
macro_rules! change {
($field:expr, $enum_variant:path) => {
if let Some($enum_variant(src_file)) = $field.as_mut()
&& let Some(filename) = src_file
.path()
.and_then(|p| p.file_name())
.and_then(|f| f.to_str())
.map(|f| f.to_string())
{
let hf_url = url::Url::parse(base_url)
.and_then(|u| u.join(filename.as_ref()))
.context(filename)?;
src_file.move_to_url(hf_url);
}
};
}
// config.json
change!(self.model_info, ModelInfoType::HfConfigJson);
// generation_config.json
change!(self.gen_config, GenerationConfig::HfGenerationConfigJson);
// tokenizer_config.json
change!(
self.prompt_formatter,
PromptFormatterArtifact::HfTokenizerConfigJson
);
// tokenizer.json
change!(self.tokenizer, TokenizerKind::HfTokenizerJson);
// We only "move" the chat template if it came form the repo. If we have a custom template
// file we cannot download that from HF.
if let Some(PromptFormatterArtifact::HfChatTemplate {
file: src_file,
is_custom,
}) = self.chat_template_file.as_mut()
{
if *is_custom {
tracing::info!(
"Detected custom chat template. Ensure file exists in the same location on all hosts."
);
} else if let Some(filename) = src_file
.path()
.and_then(|p| p.file_name())
.and_then(|f| f.to_str())
.map(|f| f.to_string())
{
let hf_url = url::Url::parse(base_url)
.and_then(|u| u.join(filename.as_ref()))
.context(filename)?;
src_file.move_to_url(hf_url);
}
}
Ok(())
}
/// Are all the files we need (tokenizer.json, etc) available locally?
fn has_local_files(&self) -> bool {
let has_model_info = self
......@@ -466,12 +540,15 @@ impl ModelDeploymentCard {
if let Some(pf) = self.prompt_formatter.as_mut() {
pf.update_dir(dir);
}
if let Some(ct) = self.chat_template_file.as_mut() {
ct.update_dir(dir);
}
if let Some(gc) = self.gen_config.as_mut() {
gc.update_dir(dir);
}
// If it's a custom chat template we didn't download it, so leave the path untouched
if let Some(ct) = self.chat_template_file.as_mut()
&& !ct.is_custom()
{
ct.update_dir(dir);
}
}
/// Creates a ModelDeploymentCard from a local directory path.
......@@ -548,18 +625,21 @@ impl ModelDeploymentCard {
)
})?;
Some(PromptFormatterArtifact::HfChatTemplate(
CheckedFile::from_disk(template_path)?,
))
Some(PromptFormatterArtifact::HfChatTemplate {
is_custom: custom_template_path.is_some(),
file: CheckedFile::from_disk(template_path)?,
})
} else {
PromptFormatterArtifact::chat_template_from_disk(local_path)?
};
// This gets replaced when we `set_name`
let display_name = local_path.display().to_string();
Ok(Self {
slug: Slug::from_string(&display_name),
display_name,
source_model: None,
source_path: None,
model_info,
tokenizer,
gen_config,
......@@ -846,7 +926,10 @@ impl PromptFormatterArtifact {
pub fn chat_template_from_disk(directory: &Path) -> Result<Option<Self>> {
match CheckedFile::from_disk(directory.join("chat_template.jinja")) {
Ok(f) => Ok(Some(Self::HfChatTemplate(f))),
Ok(f) => Ok(Some(Self::HfChatTemplate {
file: f,
is_custom: false,
})),
Err(_) => Ok(None),
}
}
......
......@@ -42,8 +42,12 @@ impl PromptFormatter {
mdc.display_name
);
};
let contents = std::fs::read_to_string(file)
.with_context(|| format!("fs:read_to_string '{}'", file.display()))?;
let contents = std::fs::read_to_string(file).with_context(|| {
format!(
"PromptFormatter.from_mdc fs:read_to_string '{}'",
file.display()
)
})?;
let mut config: ChatTemplate =
serde_json::from_str(&contents).inspect_err(|err| {
crate::log_json_err(&file.display().to_string(), &contents, err)
......@@ -53,8 +57,9 @@ impl PromptFormatter {
// stores the chat template in a separate file, we check if the file exists and
// put the chat template into config as normalization.
// This may also be a custom template provided via CLI flag.
if let Some(PromptFormatterArtifact::HfChatTemplate(checked_file)) =
mdc.chat_template_file.as_ref()
if let Some(PromptFormatterArtifact::HfChatTemplate {
file: checked_file, ..
}) = mdc.chat_template_file.as_ref()
{
let Some(chat_template_file) = checked_file.path() else {
anyhow::bail!(
......@@ -77,7 +82,7 @@ impl PromptFormatter {
.map_or(ContextMixins::default(), |x| ContextMixins::new(&x)),
)
}
PromptFormatterArtifact::HfChatTemplate(_) => Err(anyhow::anyhow!(
PromptFormatterArtifact::HfChatTemplate { .. } => Err(anyhow::anyhow!(
"prompt_formatter should not have type HfChatTemplate"
)),
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment