Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
f6ae58e3
Unverified
Commit
f6ae58e3
authored
Jan 12, 2026
by
Graham King
Committed by
GitHub
Jan 12, 2026
Browse files
fix(llm): Send HF URLs to frontend not full paths (#5290)
Signed-off-by:
Graham King
<
grahamk@nvidia.com
>
parent
cbbaa6b7
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
172 additions
and
42 deletions
+172
-42
lib/bindings/python/rust/lib.rs
lib/bindings/python/rust/lib.rs
+9
-1
lib/llm/src/common/checked_file.rs
lib/llm/src/common/checked_file.rs
+9
-1
lib/llm/src/entrypoint/input/common.rs
lib/llm/src/entrypoint/input/common.rs
+5
-2
lib/llm/src/local_model.rs
lib/llm/src/local_model.rs
+36
-13
lib/llm/src/model_card.rs
lib/llm/src/model_card.rs
+103
-20
lib/llm/src/preprocessor/prompt/template.rs
lib/llm/src/preprocessor/prompt/template.rs
+10
-5
No files found.
lib/bindings/python/rust/lib.rs
View file @
f6ae58e3
...
...
@@ -360,7 +360,12 @@ fn register_llm<'p>(
let
mut
builder
=
dynamo_llm
::
local_model
::
LocalModelBuilder
::
default
();
builder
// model path is the physical path on disk of the downloaded model
.model_path
(
model_path
)
// source path is what the user gave as `--model-path`, either a real path (in which
// case it matches model_path above), or an HF repo.
.source_path
(
source_path
.clone
()
.into
())
// --served_model_name
.model_name
(
model_name
.clone
())
.context_length
(
context_length
)
.kv_cache_block_size
(
kv_cache_block_size
)
...
...
@@ -386,7 +391,10 @@ fn register_llm<'p>(
if
let
Some
(
lora_name
)
=
lora_identifier
{
tracing
::
info!
(
"Registered LoRA '{}' MDC"
,
lora_name
);
}
else
{
tracing
::
info!
(
"Registered base model '{:?}' MDC"
,
model_name
);
tracing
::
info!
(
"Registered base model '{}' MDC"
,
model_name
.unwrap_or
(
source_path
)
);
}
Ok
(())
...
...
lib/llm/src/common/checked_file.rs
View file @
f6ae58e3
...
...
@@ -116,7 +116,15 @@ impl CheckedFile {
*
path
=
new_path
;
}
}
Either
::
Right
(
_
)
=>
tracing
::
warn!
(
"Cannot update directory on URL"
),
Either
::
Right
(
url
)
=>
{
let
Some
(
filename
)
=
url
.path
()
.split
(
'/'
)
.next_back
()
.filter
(|
s
|
!
s
.is_empty
())
else
{
tracing
::
warn!
(
%
url
,
"Cannot update directory on invalid URL"
);
return
;
};
let
p
=
dir
.join
(
filename
);
self
.path
=
Either
::
Left
(
p
);
}
}
}
}
...
...
lib/llm/src/entrypoint/input/common.rs
View file @
f6ae58e3
...
...
@@ -24,6 +24,7 @@ use crate::{
},
};
use
anyhow
::
Context
as
_
;
use
dynamo_runtime
::{
DistributedRuntime
,
component
::
Client
,
...
...
@@ -191,9 +192,11 @@ where
Pin
<
Box
<
dyn
AsyncEngineStream
<
Annotated
<
BackendOutput
>>>>
,
>
,
{
let
PromptFormatter
::
OAI
(
formatter
)
=
PromptFormatter
::
from_mdc
(
card
)
?
;
let
PromptFormatter
::
OAI
(
formatter
)
=
PromptFormatter
::
from_mdc
(
card
)
.context
(
"PromptFormatter.from_mdc"
)
?
;
let
preprocessor
=
OpenAIPreprocessor
::
new_with_parts
(
card
.clone
(),
formatter
,
hf_tokenizer
.clone
())
?
;
OpenAIPreprocessor
::
new_with_parts
(
card
.clone
(),
formatter
,
hf_tokenizer
.clone
())
.context
(
"OpenAIPreprocessor.new_with_parts"
)
?
;
build_routed_pipeline_with_preprocessor
(
card
,
client
,
...
...
lib/llm/src/local_model.rs
View file @
f6ae58e3
...
...
@@ -4,6 +4,7 @@
use
std
::
fs
;
use
std
::
path
::{
Path
,
PathBuf
};
use
anyhow
::
Context
as
_
;
use
dynamo_runtime
::
component
::
Endpoint
;
use
dynamo_runtime
::
discovery
::
DiscoveryInstance
;
use
dynamo_runtime
::
discovery
::
DiscoverySpec
;
...
...
@@ -36,6 +37,7 @@ pub const DEFAULT_HTTP_PORT: u16 = 8080;
pub
struct
LocalModelBuilder
{
model_path
:
Option
<
PathBuf
>
,
source_path
:
Option
<
PathBuf
>
,
model_name
:
Option
<
String
>
,
endpoint_id
:
Option
<
EndpointId
>
,
context_length
:
Option
<
u32
>
,
...
...
@@ -70,6 +72,7 @@ impl Default for LocalModelBuilder {
tls_cert_path
:
Default
::
default
(),
tls_key_path
:
Default
::
default
(),
model_path
:
Default
::
default
(),
source_path
:
Default
::
default
(),
model_name
:
Default
::
default
(),
endpoint_id
:
Default
::
default
(),
context_length
:
Default
::
default
(),
...
...
@@ -91,12 +94,20 @@ impl Default for LocalModelBuilder {
}
impl
LocalModelBuilder
{
/// The path must exist
/// The path must exist
, the model is already downloaded
pub
fn
model_path
(
&
mut
self
,
model_path
:
PathBuf
)
->
&
mut
Self
{
self
.model_path
=
Some
(
model_path
);
self
}
/// The HF name of the model before we downloaded it, or a local path if
/// that was given on the cmd line. We need this because `model_path` is always
/// a local path.
pub
fn
source_path
(
&
mut
self
,
source_path
:
PathBuf
)
->
&
mut
Self
{
self
.source_path
=
Some
(
source_path
);
self
}
pub
fn
model_name
(
&
mut
self
,
model_name
:
Option
<
String
>
)
->
&
mut
Self
{
self
.model_name
=
model_name
;
self
...
...
@@ -311,14 +322,15 @@ impl LocalModelBuilder {
let
mut
card
=
ModelDeploymentCard
::
load_from_disk
(
&
model_path
,
self
.custom_template_path
.as_deref
())
?
;
// Source path is the `--model-path` the user passed. By now our `model_path` is the local
// path of the downloaded model.
if
let
Some
(
source_path
)
=
self
.source_path
.take
()
{
card
.set_source_path
(
source_path
);
}
// The served model name defaults to the full model path.
// This matches what vllm and sglang do.
card
.set_name
(
&
self
.model_name
.clone
()
.unwrap_or_else
(||
model_path
.display
()
.to_string
()),
);
let
alt
=
card
.source_path
()
.to_string
();
card
.set_name
(
self
.model_name
.as_deref
()
.unwrap_or
(
&
alt
));
card
.kv_cache_block_size
=
self
.kv_cache_block_size
;
...
...
@@ -443,12 +455,6 @@ impl LocalModel {
self
.custom_backend_metrics_polling_interval
}
pub
fn
is_gguf
(
&
self
)
->
bool
{
// GGUF is the only file (not-folder) we accept, so we don't need to check the extension
// We will error when we come to parse it
self
.full_path
.is_file
()
}
/// An endpoint to identify this model by.
pub
fn
endpoint_id
(
&
self
)
->
&
EndpointId
{
&
self
.endpoint_id
...
...
@@ -491,6 +497,23 @@ impl LocalModel {
suffix_for_log
);
let
source_path
=
PathBuf
::
from
(
self
.card
.source_path
());
if
!
source_path
.exists
()
{
// The consumers of MDC (frontend) might not have the same local path as us, so
// replace disk paths with a custom URL like "hf://Qwen/Qwen3-0.6B/config.json".
//
// We can't do this if the model came from disk, as it might not be the same version
// as on Hugging Face (if it exists there at all).
//
// The URL is not used by anything. Frontend will download the repo and edit these
// paths to be local, so only the filename part matters currently.
// Possibly we should just use the filenames here. The URL feels nicer to me, it makes
// each field fully identified and fetchable independently.
self
.card
.move_to_url
(
&
format!
(
"hf://{}/"
,
self
.card
.source_path
()))
.context
(
"move_to_url"
)
?
;
}
// Register the Model Deployment Card via discovery interface
// The model_suffix (for LoRA) will be appended AFTER the instance_id
let
discovery
=
endpoint
.drt
()
.discovery
();
...
...
lib/llm/src/model_card.rs
View file @
f6ae58e3
...
...
@@ -13,7 +13,7 @@
//! - Prompt formatter settings (PromptFormatterArtifact)
use
std
::
fmt
;
use
std
::
path
::
Path
;
use
std
::
path
::
{
Path
,
PathBuf
}
;
use
std
::
sync
::{
Arc
,
OnceLock
};
use
crate
::
common
::
checked_file
::
CheckedFile
;
...
...
@@ -99,14 +99,14 @@ impl TokenizerKind {
#[serde(rename_all
=
"snake_case"
)]
pub
enum
PromptFormatterArtifact
{
HfTokenizerConfigJson
(
CheckedFile
),
HfChatTemplate
(
CheckedFile
)
,
HfChatTemplate
{
is_custom
:
bool
,
file
:
CheckedFile
}
,
}
impl
PromptFormatterArtifact
{
pub
fn
checksum
(
&
self
)
->
String
{
match
self
{
PromptFormatterArtifact
::
HfTokenizerConfigJson
(
c
)
=>
c
.checksum
()
.to_string
(),
PromptFormatterArtifact
::
HfChatTemplate
(
c
)
=>
c
.checksum
()
.to_string
(),
PromptFormatterArtifact
::
HfChatTemplate
{
file
:
c
,
..
}
=>
c
.checksum
()
.to_string
(),
}
}
...
...
@@ -114,14 +114,21 @@ impl PromptFormatterArtifact {
pub
fn
is_local
(
&
self
)
->
bool
{
match
self
{
PromptFormatterArtifact
::
HfTokenizerConfigJson
(
c
)
=>
c
.is_local
(),
PromptFormatterArtifact
::
HfChatTemplate
(
c
)
=>
c
.is_local
(),
PromptFormatterArtifact
::
HfChatTemplate
{
file
:
c
,
..
}
=>
c
.is_local
(),
}
}
pub
fn
update_dir
(
&
mut
self
,
dir
:
&
Path
)
{
match
self
{
PromptFormatterArtifact
::
HfTokenizerConfigJson
(
c
)
=>
c
.update_dir
(
dir
),
PromptFormatterArtifact
::
HfChatTemplate
(
c
)
=>
c
.update_dir
(
dir
),
PromptFormatterArtifact
::
HfChatTemplate
{
file
:
c
,
..
}
=>
c
.update_dir
(
dir
),
}
}
pub
fn
is_custom
(
&
self
)
->
bool
{
match
self
{
PromptFormatterArtifact
::
HfTokenizerConfigJson
(
_
)
=>
false
,
PromptFormatterArtifact
::
HfChatTemplate
{
is_custom
,
..
}
=>
*
is_custom
,
}
}
}
...
...
@@ -180,7 +187,7 @@ pub struct ModelDeploymentCard {
/// this field preserves the original repository path needed for downloads.
/// Falls back to `display_name` if not set.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
source_
model
:
Option
<
String
>
,
pub
source_
path
:
Option
<
String
>
,
/// Model information
pub
model_info
:
Option
<
ModelInfoType
>
,
...
...
@@ -305,6 +312,9 @@ impl ModelDeploymentCard {
// Only include the important fields
let
mut
bytes_to_hash
:
Vec
<
u8
>
=
Vec
::
with_capacity
(
512
);
bytes_to_hash
.extend
(
self
.display_name
.as_bytes
());
if
let
Some
(
source_path
)
=
self
.source_path
.as_ref
()
{
bytes_to_hash
.extend
(
source_path
.as_bytes
());
}
// The files can be either a URL or a local path, so we ignore that and hash their
// checksum instead, which won't change wherever they are.
...
...
@@ -371,17 +381,21 @@ impl ModelDeploymentCard {
}
}
pub
(
crate
)
fn
set_source_path
(
&
mut
self
,
source_path
:
PathBuf
)
{
self
.source_path
=
Some
(
source_path
.display
()
.to_string
());
}
/// Allow user to override the name we register this model under.
/// Corresponds to vllm's `--served-model-name`.
pub
fn
set_name
(
&
mut
self
,
name
:
&
str
)
{
// Preserve original model path before overwriting display_name
if
self
.source_model
.is_none
()
&&
!
self
.display_name
.is_empty
()
{
self
.source_model
=
Some
(
self
.display_name
.clone
());
}
self
.display_name
=
name
.to_string
();
self
.slug
=
Slug
::
from_string
(
name
);
}
pub
fn
source_path
(
&
self
)
->
&
str
{
self
.source_path
.as_ref
()
.unwrap_or
(
&
self
.display_name
)
}
/// Build an in-memory ModelDeploymentCard from a folder containing config.json,
/// tokenizer.json and tokenizer_config.json (i.e. a huggingface repo checkout).
/// Optional custom template.
...
...
@@ -413,13 +427,73 @@ impl ModelDeploymentCard {
}
let
ignore_weights
=
true
;
let
model_name
=
self
.source_model
.as_ref
()
.unwrap_or
(
&
self
.display_name
);
let
local_path
=
crate
::
hub
::
from_hf
(
model_name
,
ignore_weights
)
.await
?
;
let
local_path
=
crate
::
hub
::
from_hf
(
self
.source_path
(),
ignore_weights
)
.await
?
;
self
.update_dir
(
&
local_path
);
Ok
(())
}
/// Re-write all the local disk paths as a URL. Do this before publishing the MDC.
/// The opposite of `move_to_url` is `update_dir`.
pub
fn
move_to_url
(
&
mut
self
,
base_url
:
&
str
)
->
anyhow
::
Result
<
()
>
{
macro_rules!
change
{
(
$field:expr
,
$enum_variant:path
)
=>
{
if
let
Some
(
$enum_variant
(
src_file
))
=
$field
.as_mut
()
&&
let
Some
(
filename
)
=
src_file
.path
()
.and_then
(|
p
|
p
.file_name
())
.and_then
(|
f
|
f
.to_str
())
.map
(|
f
|
f
.to_string
())
{
let
hf_url
=
url
::
Url
::
parse
(
base_url
)
.and_then
(|
u
|
u
.join
(
filename
.as_ref
()))
.context
(
filename
)
?
;
src_file
.move_to_url
(
hf_url
);
}
};
}
// config.json
change!
(
self
.model_info
,
ModelInfoType
::
HfConfigJson
);
// generation_config.json
change!
(
self
.gen_config
,
GenerationConfig
::
HfGenerationConfigJson
);
// tokenizer_config.json
change!
(
self
.prompt_formatter
,
PromptFormatterArtifact
::
HfTokenizerConfigJson
);
// tokenizer.json
change!
(
self
.tokenizer
,
TokenizerKind
::
HfTokenizerJson
);
// We only "move" the chat template if it came form the repo. If we have a custom template
// file we cannot download that from HF.
if
let
Some
(
PromptFormatterArtifact
::
HfChatTemplate
{
file
:
src_file
,
is_custom
,
})
=
self
.chat_template_file
.as_mut
()
{
if
*
is_custom
{
tracing
::
info!
(
"Detected custom chat template. Ensure file exists in the same location on all hosts."
);
}
else
if
let
Some
(
filename
)
=
src_file
.path
()
.and_then
(|
p
|
p
.file_name
())
.and_then
(|
f
|
f
.to_str
())
.map
(|
f
|
f
.to_string
())
{
let
hf_url
=
url
::
Url
::
parse
(
base_url
)
.and_then
(|
u
|
u
.join
(
filename
.as_ref
()))
.context
(
filename
)
?
;
src_file
.move_to_url
(
hf_url
);
}
}
Ok
(())
}
/// Are all the files we need (tokenizer.json, etc) available locally?
fn
has_local_files
(
&
self
)
->
bool
{
let
has_model_info
=
self
...
...
@@ -466,12 +540,15 @@ impl ModelDeploymentCard {
if
let
Some
(
pf
)
=
self
.prompt_formatter
.as_mut
()
{
pf
.update_dir
(
dir
);
}
if
let
Some
(
ct
)
=
self
.chat_template_file
.as_mut
()
{
ct
.update_dir
(
dir
);
}
if
let
Some
(
gc
)
=
self
.gen_config
.as_mut
()
{
gc
.update_dir
(
dir
);
}
// If it's a custom chat template we didn't download it, so leave the path untouched
if
let
Some
(
ct
)
=
self
.chat_template_file
.as_mut
()
&&
!
ct
.is_custom
()
{
ct
.update_dir
(
dir
);
}
}
/// Creates a ModelDeploymentCard from a local directory path.
...
...
@@ -548,18 +625,21 @@ impl ModelDeploymentCard {
)
})
?
;
Some
(
PromptFormatterArtifact
::
HfChatTemplate
(
CheckedFile
::
from_disk
(
template_path
)
?
,
))
Some
(
PromptFormatterArtifact
::
HfChatTemplate
{
is_custom
:
custom_template_path
.is_some
(),
file
:
CheckedFile
::
from_disk
(
template_path
)
?
,
})
}
else
{
PromptFormatterArtifact
::
chat_template_from_disk
(
local_path
)
?
};
// This gets replaced when we `set_name`
let
display_name
=
local_path
.display
()
.to_string
();
Ok
(
Self
{
slug
:
Slug
::
from_string
(
&
display_name
),
display_name
,
source_
model
:
None
,
source_
path
:
None
,
model_info
,
tokenizer
,
gen_config
,
...
...
@@ -846,7 +926,10 @@ impl PromptFormatterArtifact {
pub
fn
chat_template_from_disk
(
directory
:
&
Path
)
->
Result
<
Option
<
Self
>>
{
match
CheckedFile
::
from_disk
(
directory
.join
(
"chat_template.jinja"
))
{
Ok
(
f
)
=>
Ok
(
Some
(
Self
::
HfChatTemplate
(
f
))),
Ok
(
f
)
=>
Ok
(
Some
(
Self
::
HfChatTemplate
{
file
:
f
,
is_custom
:
false
,
})),
Err
(
_
)
=>
Ok
(
None
),
}
}
...
...
lib/llm/src/preprocessor/prompt/template.rs
View file @
f6ae58e3
...
...
@@ -42,8 +42,12 @@ impl PromptFormatter {
mdc
.display_name
);
};
let
contents
=
std
::
fs
::
read_to_string
(
file
)
.with_context
(||
format!
(
"fs:read_to_string '{}'"
,
file
.display
()))
?
;
let
contents
=
std
::
fs
::
read_to_string
(
file
)
.with_context
(||
{
format!
(
"PromptFormatter.from_mdc fs:read_to_string '{}'"
,
file
.display
()
)
})
?
;
let
mut
config
:
ChatTemplate
=
serde_json
::
from_str
(
&
contents
)
.inspect_err
(|
err
|
{
crate
::
log_json_err
(
&
file
.display
()
.to_string
(),
&
contents
,
err
)
...
...
@@ -53,8 +57,9 @@ impl PromptFormatter {
// stores the chat template in a separate file, we check if the file exists and
// put the chat template into config as normalization.
// This may also be a custom template provided via CLI flag.
if
let
Some
(
PromptFormatterArtifact
::
HfChatTemplate
(
checked_file
))
=
mdc
.chat_template_file
.as_ref
()
if
let
Some
(
PromptFormatterArtifact
::
HfChatTemplate
{
file
:
checked_file
,
..
})
=
mdc
.chat_template_file
.as_ref
()
{
let
Some
(
chat_template_file
)
=
checked_file
.path
()
else
{
anyhow
::
bail!
(
...
...
@@ -77,7 +82,7 @@ impl PromptFormatter {
.map_or
(
ContextMixins
::
default
(),
|
x
|
ContextMixins
::
new
(
&
x
)),
)
}
PromptFormatterArtifact
::
HfChatTemplate
(
_
)
=>
Err
(
anyhow
::
anyhow!
(
PromptFormatterArtifact
::
HfChatTemplate
{
..
}
=>
Err
(
anyhow
::
anyhow!
(
"prompt_formatter should not have type HfChatTemplate"
)),
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment