"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "0ee349b5534e3d02b499b1126f2abde73b798fe9"
Unverified Commit 1954fcfa authored by Graham King's avatar Graham King Committed by GitHub
Browse files

chore: Remove service_name from ModelDeploymentCard (#2349)

parent ccc8815b
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use super::*;
use llm_rs::model_card::model::ModelDeploymentCard as RsModelDeploymentCard;
use llm_rs::model_card::ModelDeploymentCard as RsModelDeploymentCard;
#[pyclass]
#[derive(Clone)]
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Backend
//!
......@@ -33,7 +21,7 @@ use anyhow::{Error, Result};
use futures::stream::{self, StreamExt};
use tracing as log;
use crate::model_card::model::{ModelDeploymentCard, TokenizerKind};
use crate::model_card::{ModelDeploymentCard, TokenizerKind};
use dynamo_runtime::{
pipeline::{
async_trait, AsyncEngineContextProvider, ManyOut, Operator, ResponseStream,
......
......@@ -251,12 +251,15 @@ impl LocalModel {
&self.full_path
}
/// Human friendly model name. This is the correct name.
pub fn display_name(&self) -> &str {
&self.card.display_name
}
/// The name under which we make this model available over HTTP.
/// A slugified version of the model's name, for use in NATS, etcd, etc.
pub fn service_name(&self) -> &str {
&self.card.service_name
self.card.slug().as_ref()
}
pub fn request_template(&self) -> Option<RequestTemplate> {
......
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
......@@ -23,7 +11,7 @@ use async_nats::client::{
};
use crate::{
model_card::model::ModelDeploymentCard,
model_card::ModelDeploymentCard,
protocols::common::llm_backend::{LLMEngineOutput, PreprocessedRequest},
};
......
This diff is collapsed.
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use crate::model_card::model::ModelDeploymentCard;
use anyhow::{Context, Result};
use std::path::{Path, PathBuf};
use crate::model_card::model::{ModelInfoType, PromptFormatterArtifact, TokenizerKind};
use super::model::GenerationConfig;
impl ModelDeploymentCard {
/// Allow user to override the name we register this model under.
/// Corresponds to vllm's `--served-model-name`.
pub fn set_name(&mut self, name: &str) {
self.display_name = name.to_string();
self.service_name = name.to_string();
}
/// Build an in-memory ModelDeploymentCard from either:
/// - a folder containing config.json, tokenizer.json and token_config.json
/// - a GGUF file
pub async fn load(config_path: impl AsRef<Path>) -> anyhow::Result<ModelDeploymentCard> {
let config_path = config_path.as_ref();
if config_path.is_dir() {
Self::from_local_path(config_path).await
} else {
Self::from_gguf(config_path).await
}
}
/// Creates a ModelDeploymentCard from a local directory path.
///
/// Currently HuggingFace format is supported and following files are expected:
/// - config.json: Model configuration in HuggingFace format
/// - tokenizer.json: Tokenizer configuration in HuggingFace format
/// - tokenizer_config.json: Optional prompt formatter configuration
///
/// # Arguments
/// * `local_root_dir` - Path to the local model directory
///
/// # Errors
/// Returns an error if:
/// - The path doesn't exist or isn't a directory
/// - The path contains invalid Unicode characters
/// - Required model files are missing or invalid
async fn from_local_path(local_root_dir: impl AsRef<Path>) -> anyhow::Result<Self> {
let local_root_dir = local_root_dir.as_ref();
check_valid_local_repo_path(local_root_dir)?;
let repo_id = local_root_dir
.canonicalize()?
.to_str()
.ok_or_else(|| anyhow::anyhow!("Path contains invalid Unicode"))?
.to_string();
let model_name = local_root_dir
.file_name()
.and_then(|n| n.to_str())
.ok_or_else(|| anyhow::anyhow!("Invalid model directory name"))?;
Self::from_repo(&repo_id, model_name).await
}
async fn from_gguf(gguf_file: &Path) -> anyhow::Result<Self> {
let model_name = gguf_file
.iter()
.next_back()
.map(|n| n.to_string_lossy().to_string());
let Some(model_name) = model_name else {
// I think this would only happy on an empty path
anyhow::bail!(
"Could not extract model name from path '{}'",
gguf_file.display()
);
};
// TODO: we do this in HFConfig also, unify
let content = super::model::load_gguf(gguf_file)?;
let context_length = content.get_metadata()[&format!("{}.context_length", content.arch())]
.to_u32()
.unwrap_or(0);
tracing::debug!(context_length, "Loaded context length from GGUF");
Ok(Self {
display_name: model_name.to_string(),
service_name: model_name.to_string(),
model_info: Some(ModelInfoType::GGUF(gguf_file.to_path_buf())),
tokenizer: Some(TokenizerKind::from_gguf(gguf_file)?),
gen_config: None, // AFAICT there is no equivalent in a GGUF
prompt_formatter: Some(PromptFormatterArtifact::GGUF(gguf_file.to_path_buf())),
chat_template_file: None,
prompt_context: None, // TODO - auto-detect prompt context
revision: 0,
last_published: None,
context_length,
kv_cache_block_size: 0,
migration_limit: 0,
user_data: None,
})
}
#[allow(dead_code)]
async fn from_ngc_repo(_: &str) -> anyhow::Result<Self> {
Err(anyhow::anyhow!(
"ModelDeploymentCard::from_ngc_repo is not implemented"
))
}
async fn from_repo(repo_id: &str, model_name: &str) -> anyhow::Result<Self> {
// This is usually the right choice
let context_length = crate::file_json_field(
&PathBuf::from(repo_id).join("config.json"),
"max_position_embeddings",
)
// But sometimes this is
.or_else(|_| {
crate::file_json_field(
&PathBuf::from(repo_id).join("tokenizer_config.json"),
"model_max_length",
)
})
// If neither of those are present let the engine default it
.unwrap_or(0);
Ok(Self {
display_name: model_name.to_string(),
service_name: model_name.to_string(),
model_info: Some(ModelInfoType::from_repo(repo_id).await?),
tokenizer: Some(TokenizerKind::from_repo(repo_id).await?),
gen_config: GenerationConfig::from_repo(repo_id).await.ok(), // optional
prompt_formatter: PromptFormatterArtifact::from_repo(repo_id).await?,
chat_template_file: PromptFormatterArtifact::chat_template_from_repo(repo_id).await?,
prompt_context: None, // TODO - auto-detect prompt context
revision: 0,
last_published: None,
context_length,
kv_cache_block_size: 0, // set later
migration_limit: 0,
user_data: None,
})
}
}
impl ModelInfoType {
pub async fn from_repo(repo_id: &str) -> Result<Self> {
Self::try_is_hf_repo(repo_id)
.await
.with_context(|| format!("unable to extract model info from repo {}", repo_id))
}
async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
Ok(Self::HfConfigJson(
check_for_file(repo, "config.json").await?,
))
}
}
impl PromptFormatterArtifact {
pub async fn from_repo(repo_id: &str) -> Result<Option<Self>> {
// we should only error if we expect a prompt formatter and it's not found
// right now, we don't know when to expect it, so we just return Ok(Some/None)
Ok(Self::try_is_hf_repo(repo_id)
.await
.with_context(|| format!("unable to extract prompt format from repo {}", repo_id))
.ok())
}
pub async fn chat_template_from_repo(repo_id: &str) -> Result<Option<Self>> {
Ok(Self::chat_template_try_is_hf_repo(repo_id)
.await
.with_context(|| format!("unable to extract prompt format from repo {}", repo_id))
.ok())
}
async fn chat_template_try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
Ok(Self::HfChatTemplate(
check_for_file(repo, "chat_template.jinja").await?,
))
}
async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
Ok(Self::HfTokenizerConfigJson(
check_for_file(repo, "tokenizer_config.json").await?,
))
}
}
impl TokenizerKind {
pub async fn from_repo(repo_id: &str) -> Result<Self> {
Self::try_is_hf_repo(repo_id)
.await
.with_context(|| format!("unable to extract tokenizer kind from repo {}", repo_id))
}
async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
Ok(Self::HfTokenizerJson(
check_for_file(repo, "tokenizer.json").await?,
))
}
}
impl GenerationConfig {
pub async fn from_repo(repo_id: &str) -> Result<Self> {
Self::try_is_hf_repo(repo_id)
.await
.with_context(|| format!("unable to extract generation config from repo {repo_id}"))
}
async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
Ok(Self::HfGenerationConfigJson(
check_for_file(repo, "generation_config.json").await?,
))
}
}
/// Checks if the provided path contains the expected file.
async fn check_for_file(repo_id: &str, file: &str) -> anyhow::Result<String> {
let p = PathBuf::from(repo_id).join(file);
let name = p.display().to_string();
if !p.exists() {
anyhow::bail!("File not found: {name}")
}
Ok(name)
}
/// Checks if the provided path is a valid local repository path.
///
/// # Arguments
/// * `path` - Path to validate
///
/// # Errors
/// Returns an error if the path doesn't exist or isn't a directory
fn check_valid_local_repo_path(path: impl AsRef<Path>) -> Result<()> {
let path = path.as_ref();
if !path.exists() {
return Err(anyhow::anyhow!(
"Model path does not exist: {}",
path.display()
));
}
if !path.is_dir() {
return Err(anyhow::anyhow!(
"Model path is not a directory: {}",
path.display()
));
}
Ok(())
}
This diff is collapsed.
......@@ -22,7 +22,7 @@ use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use std::{collections::HashMap, sync::Arc};
use tracing;
use crate::model_card::model::{ModelDeploymentCard, ModelInfo, TokenizerKind};
use crate::model_card::{ModelDeploymentCard, ModelInfo, TokenizerKind};
use crate::preprocessor::prompt::OAIChatLikeRequest;
use crate::tokenizers::Encoding;
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::{collections::HashSet, sync::Arc};
use anyhow::{Context, Ok, Result};
use minijinja::Environment;
use crate::model_card::model::{ModelDeploymentCard, PromptContextMixin, PromptFormatterArtifact};
use crate::model_card::{ModelDeploymentCard, PromptContextMixin, PromptFormatterArtifact};
mod context;
mod formatters;
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use dynamo_llm::backend::Backend;
use dynamo_llm::model_card::model::ModelDeploymentCard;
use dynamo_llm::model_card::ModelDeploymentCard;
#[tokio::test]
async fn test_sequence_factory() {
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use dynamo_llm::model_card::model::{ModelDeploymentCard, PromptFormatterArtifact, TokenizerKind};
use dynamo_llm::model_card::{ModelDeploymentCard, PromptFormatterArtifact, TokenizerKind};
use tempfile::tempdir;
const HF_PATH: &str = "tests/data/sample-models/TinyLlama_v1.1";
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use anyhow::{Ok, Result};
use dynamo_llm::model_card::model::{ModelDeploymentCard, PromptContextMixin};
use dynamo_llm::model_card::{ModelDeploymentCard, PromptContextMixin};
use dynamo_llm::preprocessor::prompt::PromptFormatter;
use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionRequest;
use serde::{Deserialize, Serialize};
......
......@@ -21,7 +21,7 @@ const REPLACEMENT_CHAR: char = '_';
/// URL and NATS friendly string.
/// Only a-z, 0-9, - and _.
#[derive(Serialize, Clone, Debug, Eq, PartialEq)]
#[derive(Serialize, Clone, Debug, Eq, PartialEq, Default)]
pub struct Slug(String);
impl Slug {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment