Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
1954fcfa
Unverified
Commit
1954fcfa
authored
Aug 07, 2025
by
Graham King
Committed by
GitHub
Aug 07, 2025
Browse files
chore: Remove service_name from ModelDeploymentCard (#2349)
parent
ccc8815b
Changes
13
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
892 additions
and
1001 deletions
+892
-1001
lib/bindings/python/rust/llm/model_card.rs
lib/bindings/python/rust/llm/model_card.rs
+1
-13
lib/llm/src/backend.rs
lib/llm/src/backend.rs
+1
-13
lib/llm/src/local_model.rs
lib/llm/src/local_model.rs
+4
-1
lib/llm/src/migration.rs
lib/llm/src/migration.rs
+1
-13
lib/llm/src/model_card.rs
lib/llm/src/model_card.rs
+879
-3
lib/llm/src/model_card/create.rs
lib/llm/src/model_card/create.rs
+0
-247
lib/llm/src/model_card/model.rs
lib/llm/src/model_card/model.rs
+0
-657
lib/llm/src/preprocessor.rs
lib/llm/src/preprocessor.rs
+1
-1
lib/llm/src/preprocessor/prompt/template.rs
lib/llm/src/preprocessor/prompt/template.rs
+1
-13
lib/llm/tests/backend.rs
lib/llm/tests/backend.rs
+1
-13
lib/llm/tests/model_card.rs
lib/llm/tests/model_card.rs
+1
-13
lib/llm/tests/preprocessor.rs
lib/llm/tests/preprocessor.rs
+1
-13
lib/runtime/src/slug.rs
lib/runtime/src/slug.rs
+1
-1
No files found.
lib/bindings/python/rust/llm/model_card.rs
View file @
1954fcfa
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use
super
::
*
;
use
llm_rs
::
model_card
::
model
::
ModelDeploymentCard
as
RsModelDeploymentCard
;
use
llm_rs
::
model_card
::
ModelDeploymentCard
as
RsModelDeploymentCard
;
#[pyclass]
#[derive(Clone)]
...
...
lib/llm/src/backend.rs
View file @
1954fcfa
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Backend
//!
...
...
@@ -33,7 +21,7 @@ use anyhow::{Error, Result};
use
futures
::
stream
::{
self
,
StreamExt
};
use
tracing
as
log
;
use
crate
::
model_card
::
model
::
{
ModelDeploymentCard
,
TokenizerKind
};
use
crate
::
model_card
::{
ModelDeploymentCard
,
TokenizerKind
};
use
dynamo_runtime
::{
pipeline
::{
async_trait
,
AsyncEngineContextProvider
,
ManyOut
,
Operator
,
ResponseStream
,
...
...
lib/llm/src/local_model.rs
View file @
1954fcfa
...
...
@@ -251,12 +251,15 @@ impl LocalModel {
&
self
.full_path
}
/// Human friendly model name. This is the correct name.
pub
fn
display_name
(
&
self
)
->
&
str
{
&
self
.card.display_name
}
/// The name under which we make this model available over HTTP.
/// A slugified version of the model's name, for use in NATS, etcd, etc.
pub
fn
service_name
(
&
self
)
->
&
str
{
&
self
.card.s
ervice_name
self
.card
.s
lug
()
.as_ref
()
}
pub
fn
request_template
(
&
self
)
->
Option
<
RequestTemplate
>
{
...
...
lib/llm/src/migration.rs
View file @
1954fcfa
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use
std
::
sync
::
Arc
;
...
...
@@ -23,7 +11,7 @@ use async_nats::client::{
};
use
crate
::{
model_card
::
model
::
ModelDeploymentCard
,
model_card
::
ModelDeploymentCard
,
protocols
::
common
::
llm_backend
::{
LLMEngineOutput
,
PreprocessedRequest
},
};
...
...
lib/llm/src/model_card.rs
View file @
1954fcfa
This diff is collapsed.
Click to expand it.
lib/llm/src/model_card/create.rs
deleted
100644 → 0
View file @
ccc8815b
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
crate
::
model_card
::
model
::
ModelDeploymentCard
;
use
anyhow
::{
Context
,
Result
};
use
std
::
path
::{
Path
,
PathBuf
};
use
crate
::
model_card
::
model
::{
ModelInfoType
,
PromptFormatterArtifact
,
TokenizerKind
};
use
super
::
model
::
GenerationConfig
;
impl
ModelDeploymentCard
{
/// Allow user to override the name we register this model under.
/// Corresponds to vllm's `--served-model-name`.
pub
fn
set_name
(
&
mut
self
,
name
:
&
str
)
{
self
.display_name
=
name
.to_string
();
self
.service_name
=
name
.to_string
();
}
/// Build an in-memory ModelDeploymentCard from either:
/// - a folder containing config.json, tokenizer.json and token_config.json
/// - a GGUF file
pub
async
fn
load
(
config_path
:
impl
AsRef
<
Path
>
)
->
anyhow
::
Result
<
ModelDeploymentCard
>
{
let
config_path
=
config_path
.as_ref
();
if
config_path
.is_dir
()
{
Self
::
from_local_path
(
config_path
)
.await
}
else
{
Self
::
from_gguf
(
config_path
)
.await
}
}
/// Creates a ModelDeploymentCard from a local directory path.
///
/// Currently HuggingFace format is supported and following files are expected:
/// - config.json: Model configuration in HuggingFace format
/// - tokenizer.json: Tokenizer configuration in HuggingFace format
/// - tokenizer_config.json: Optional prompt formatter configuration
///
/// # Arguments
/// * `local_root_dir` - Path to the local model directory
///
/// # Errors
/// Returns an error if:
/// - The path doesn't exist or isn't a directory
/// - The path contains invalid Unicode characters
/// - Required model files are missing or invalid
async
fn
from_local_path
(
local_root_dir
:
impl
AsRef
<
Path
>
)
->
anyhow
::
Result
<
Self
>
{
let
local_root_dir
=
local_root_dir
.as_ref
();
check_valid_local_repo_path
(
local_root_dir
)
?
;
let
repo_id
=
local_root_dir
.canonicalize
()
?
.to_str
()
.ok_or_else
(||
anyhow
::
anyhow!
(
"Path contains invalid Unicode"
))
?
.to_string
();
let
model_name
=
local_root_dir
.file_name
()
.and_then
(|
n
|
n
.to_str
())
.ok_or_else
(||
anyhow
::
anyhow!
(
"Invalid model directory name"
))
?
;
Self
::
from_repo
(
&
repo_id
,
model_name
)
.await
}
async
fn
from_gguf
(
gguf_file
:
&
Path
)
->
anyhow
::
Result
<
Self
>
{
let
model_name
=
gguf_file
.iter
()
.next_back
()
.map
(|
n
|
n
.to_string_lossy
()
.to_string
());
let
Some
(
model_name
)
=
model_name
else
{
// I think this would only happy on an empty path
anyhow
::
bail!
(
"Could not extract model name from path '{}'"
,
gguf_file
.display
()
);
};
// TODO: we do this in HFConfig also, unify
let
content
=
super
::
model
::
load_gguf
(
gguf_file
)
?
;
let
context_length
=
content
.get_metadata
()[
&
format!
(
"{}.context_length"
,
content
.arch
())]
.to_u32
()
.unwrap_or
(
0
);
tracing
::
debug!
(
context_length
,
"Loaded context length from GGUF"
);
Ok
(
Self
{
display_name
:
model_name
.to_string
(),
service_name
:
model_name
.to_string
(),
model_info
:
Some
(
ModelInfoType
::
GGUF
(
gguf_file
.to_path_buf
())),
tokenizer
:
Some
(
TokenizerKind
::
from_gguf
(
gguf_file
)
?
),
gen_config
:
None
,
// AFAICT there is no equivalent in a GGUF
prompt_formatter
:
Some
(
PromptFormatterArtifact
::
GGUF
(
gguf_file
.to_path_buf
())),
chat_template_file
:
None
,
prompt_context
:
None
,
// TODO - auto-detect prompt context
revision
:
0
,
last_published
:
None
,
context_length
,
kv_cache_block_size
:
0
,
migration_limit
:
0
,
user_data
:
None
,
})
}
#[allow(dead_code)]
async
fn
from_ngc_repo
(
_
:
&
str
)
->
anyhow
::
Result
<
Self
>
{
Err
(
anyhow
::
anyhow!
(
"ModelDeploymentCard::from_ngc_repo is not implemented"
))
}
async
fn
from_repo
(
repo_id
:
&
str
,
model_name
:
&
str
)
->
anyhow
::
Result
<
Self
>
{
// This is usually the right choice
let
context_length
=
crate
::
file_json_field
(
&
PathBuf
::
from
(
repo_id
)
.join
(
"config.json"
),
"max_position_embeddings"
,
)
// But sometimes this is
.or_else
(|
_
|
{
crate
::
file_json_field
(
&
PathBuf
::
from
(
repo_id
)
.join
(
"tokenizer_config.json"
),
"model_max_length"
,
)
})
// If neither of those are present let the engine default it
.unwrap_or
(
0
);
Ok
(
Self
{
display_name
:
model_name
.to_string
(),
service_name
:
model_name
.to_string
(),
model_info
:
Some
(
ModelInfoType
::
from_repo
(
repo_id
)
.await
?
),
tokenizer
:
Some
(
TokenizerKind
::
from_repo
(
repo_id
)
.await
?
),
gen_config
:
GenerationConfig
::
from_repo
(
repo_id
)
.await
.ok
(),
// optional
prompt_formatter
:
PromptFormatterArtifact
::
from_repo
(
repo_id
)
.await
?
,
chat_template_file
:
PromptFormatterArtifact
::
chat_template_from_repo
(
repo_id
)
.await
?
,
prompt_context
:
None
,
// TODO - auto-detect prompt context
revision
:
0
,
last_published
:
None
,
context_length
,
kv_cache_block_size
:
0
,
// set later
migration_limit
:
0
,
user_data
:
None
,
})
}
}
impl
ModelInfoType
{
pub
async
fn
from_repo
(
repo_id
:
&
str
)
->
Result
<
Self
>
{
Self
::
try_is_hf_repo
(
repo_id
)
.await
.with_context
(||
format!
(
"unable to extract model info from repo {}"
,
repo_id
))
}
async
fn
try_is_hf_repo
(
repo
:
&
str
)
->
anyhow
::
Result
<
Self
>
{
Ok
(
Self
::
HfConfigJson
(
check_for_file
(
repo
,
"config.json"
)
.await
?
,
))
}
}
impl
PromptFormatterArtifact
{
pub
async
fn
from_repo
(
repo_id
:
&
str
)
->
Result
<
Option
<
Self
>>
{
// we should only error if we expect a prompt formatter and it's not found
// right now, we don't know when to expect it, so we just return Ok(Some/None)
Ok
(
Self
::
try_is_hf_repo
(
repo_id
)
.await
.with_context
(||
format!
(
"unable to extract prompt format from repo {}"
,
repo_id
))
.ok
())
}
pub
async
fn
chat_template_from_repo
(
repo_id
:
&
str
)
->
Result
<
Option
<
Self
>>
{
Ok
(
Self
::
chat_template_try_is_hf_repo
(
repo_id
)
.await
.with_context
(||
format!
(
"unable to extract prompt format from repo {}"
,
repo_id
))
.ok
())
}
async
fn
chat_template_try_is_hf_repo
(
repo
:
&
str
)
->
anyhow
::
Result
<
Self
>
{
Ok
(
Self
::
HfChatTemplate
(
check_for_file
(
repo
,
"chat_template.jinja"
)
.await
?
,
))
}
async
fn
try_is_hf_repo
(
repo
:
&
str
)
->
anyhow
::
Result
<
Self
>
{
Ok
(
Self
::
HfTokenizerConfigJson
(
check_for_file
(
repo
,
"tokenizer_config.json"
)
.await
?
,
))
}
}
impl
TokenizerKind
{
pub
async
fn
from_repo
(
repo_id
:
&
str
)
->
Result
<
Self
>
{
Self
::
try_is_hf_repo
(
repo_id
)
.await
.with_context
(||
format!
(
"unable to extract tokenizer kind from repo {}"
,
repo_id
))
}
async
fn
try_is_hf_repo
(
repo
:
&
str
)
->
anyhow
::
Result
<
Self
>
{
Ok
(
Self
::
HfTokenizerJson
(
check_for_file
(
repo
,
"tokenizer.json"
)
.await
?
,
))
}
}
impl
GenerationConfig
{
pub
async
fn
from_repo
(
repo_id
:
&
str
)
->
Result
<
Self
>
{
Self
::
try_is_hf_repo
(
repo_id
)
.await
.with_context
(||
format!
(
"unable to extract generation config from repo {repo_id}"
))
}
async
fn
try_is_hf_repo
(
repo
:
&
str
)
->
anyhow
::
Result
<
Self
>
{
Ok
(
Self
::
HfGenerationConfigJson
(
check_for_file
(
repo
,
"generation_config.json"
)
.await
?
,
))
}
}
/// Checks if the provided path contains the expected file.
async
fn
check_for_file
(
repo_id
:
&
str
,
file
:
&
str
)
->
anyhow
::
Result
<
String
>
{
let
p
=
PathBuf
::
from
(
repo_id
)
.join
(
file
);
let
name
=
p
.display
()
.to_string
();
if
!
p
.exists
()
{
anyhow
::
bail!
(
"File not found: {name}"
)
}
Ok
(
name
)
}
/// Checks if the provided path is a valid local repository path.
///
/// # Arguments
/// * `path` - Path to validate
///
/// # Errors
/// Returns an error if the path doesn't exist or isn't a directory
fn
check_valid_local_repo_path
(
path
:
impl
AsRef
<
Path
>
)
->
Result
<
()
>
{
let
path
=
path
.as_ref
();
if
!
path
.exists
()
{
return
Err
(
anyhow
::
anyhow!
(
"Model path does not exist: {}"
,
path
.display
()
));
}
if
!
path
.is_dir
()
{
return
Err
(
anyhow
::
anyhow!
(
"Model path is not a directory: {}"
,
path
.display
()
));
}
Ok
(())
}
lib/llm/src/model_card/model.rs
deleted
100644 → 0
View file @
ccc8815b
This diff is collapsed.
Click to expand it.
lib/llm/src/preprocessor.rs
View file @
1954fcfa
...
...
@@ -22,7 +22,7 @@ use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use
std
::{
collections
::
HashMap
,
sync
::
Arc
};
use
tracing
;
use
crate
::
model_card
::
model
::
{
ModelDeploymentCard
,
ModelInfo
,
TokenizerKind
};
use
crate
::
model_card
::{
ModelDeploymentCard
,
ModelInfo
,
TokenizerKind
};
use
crate
::
preprocessor
::
prompt
::
OAIChatLikeRequest
;
use
crate
::
tokenizers
::
Encoding
;
...
...
lib/llm/src/preprocessor/prompt/template.rs
View file @
1954fcfa
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use
std
::{
collections
::
HashSet
,
sync
::
Arc
};
use
anyhow
::{
Context
,
Ok
,
Result
};
use
minijinja
::
Environment
;
use
crate
::
model_card
::
model
::
{
ModelDeploymentCard
,
PromptContextMixin
,
PromptFormatterArtifact
};
use
crate
::
model_card
::{
ModelDeploymentCard
,
PromptContextMixin
,
PromptFormatterArtifact
};
mod
context
;
mod
formatters
;
...
...
lib/llm/tests/backend.rs
View file @
1954fcfa
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use
dynamo_llm
::
backend
::
Backend
;
use
dynamo_llm
::
model_card
::
model
::
ModelDeploymentCard
;
use
dynamo_llm
::
model_card
::
ModelDeploymentCard
;
#[tokio::test]
async
fn
test_sequence_factory
()
{
...
...
lib/llm/tests/model_card.rs
View file @
1954fcfa
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use
dynamo_llm
::
model_card
::
model
::
{
ModelDeploymentCard
,
PromptFormatterArtifact
,
TokenizerKind
};
use
dynamo_llm
::
model_card
::{
ModelDeploymentCard
,
PromptFormatterArtifact
,
TokenizerKind
};
use
tempfile
::
tempdir
;
const
HF_PATH
:
&
str
=
"tests/data/sample-models/TinyLlama_v1.1"
;
...
...
lib/llm/tests/preprocessor.rs
View file @
1954fcfa
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use
anyhow
::{
Ok
,
Result
};
use
dynamo_llm
::
model_card
::
model
::
{
ModelDeploymentCard
,
PromptContextMixin
};
use
dynamo_llm
::
model_card
::{
ModelDeploymentCard
,
PromptContextMixin
};
use
dynamo_llm
::
preprocessor
::
prompt
::
PromptFormatter
;
use
dynamo_llm
::
protocols
::
openai
::
chat_completions
::
NvCreateChatCompletionRequest
;
use
serde
::{
Deserialize
,
Serialize
};
...
...
lib/runtime/src/slug.rs
View file @
1954fcfa
...
...
@@ -21,7 +21,7 @@ const REPLACEMENT_CHAR: char = '_';
/// URL and NATS friendly string.
/// Only a-z, 0-9, - and _.
#[derive(Serialize,
Clone,
Debug,
Eq,
PartialEq)]
#[derive(Serialize,
Clone,
Debug,
Eq,
PartialEq
,
Default
)]
pub
struct
Slug
(
String
);
impl
Slug
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment