"worker/vscode:/vscode.git/clone" did not exist on "c3b847901099bf5c3dd174a3c8ec994b73426833"
Commit 65a2dfab authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

feat: add local model card (#216)

parent 7f85dcc3
......@@ -122,7 +122,7 @@ $global:copyright_results = @{
$ignored_files = @('.clang-format', '.gitattributes', '.gitignore', '.gitkeep', '.patch', 'Cargo.lock', 'LICENSE', 'uv.lock', 'rust-toolchain.toml')
write-debug "<copyright-check> ignored_files = ['$($ignored_files -join "','")']."
$ignored_paths = @('.github', '.mypy_cache', '.pytest_cache')
$ignored_paths = @('.github', '.mypy_cache', '.pytest_cache', 'llm/rust/triton-llm/tests/data/sample-models')
write-debug "<copyright-check> ignored_paths = ['$($ignored_paths -join "','")']."
$ignored_types = @('.bat', '.gif', '.ico', '.ipynb', '.jpg', '.jpeg', '.patch', '.png', '.pyc', '.pyi', '.rst', '.zip', '.md')
write-debug "<copyright-check> ignored_types = ['$($ignored_types -join "', '")']."
......
......@@ -687,6 +687,7 @@ dependencies = [
"iana-time-zone",
"js-sys",
"num-traits",
"serde",
"wasm-bindgen",
"windows-targets 0.52.6",
]
......@@ -5125,6 +5126,7 @@ dependencies = [
"async-stream",
"async-trait",
"axum 0.8.1",
"blake3",
"bytes",
"chrono",
"derive_builder",
......
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
version = 4
[[package]]
name = "addr2line"
......@@ -464,6 +464,7 @@ dependencies = [
"iana-time-zone",
"js-sys",
"num-traits",
"serde",
"wasm-bindgen",
"windows-targets",
]
......@@ -3104,6 +3105,7 @@ dependencies = [
"async-stream",
"async-trait",
"axum 0.8.1",
"blake3",
"bytes",
"chrono",
"derive_builder",
......
......@@ -712,6 +712,7 @@ dependencies = [
"iana-time-zone",
"js-sys",
"num-traits",
"serde",
"wasm-bindgen",
"windows-targets 0.52.6",
]
......@@ -4713,9 +4714,9 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
[[package]]
name = "tempfile"
version = "3.16.0"
version = "3.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38c246215d7d24f48ae091a2902398798e05d978b24315d6efbc00ede9a8bb91"
checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230"
dependencies = [
"cfg-if 1.0.0",
"fastrand",
......@@ -5283,6 +5284,7 @@ dependencies = [
"async-stream",
"async-trait",
"axum 0.8.1",
"blake3",
"bytes",
"chrono",
"derive_builder",
......@@ -5298,6 +5300,7 @@ dependencies = [
"rstest",
"serde",
"serde_json",
"tempfile",
"thiserror 2.0.11",
"tokio",
"tokio-stream",
......
......@@ -48,8 +48,16 @@ validator = { workspace = true }
uuid = { workspace = true }
xxhash-rust = { workspace = true }
blake3 = "1"
# protocols
chrono = { version = "0.4" }
chrono = { version = "0.4", default-features = false, features = [
"alloc",
"std",
"clock",
"now",
"serde",
] }
serde_json = { version = "1" }
regex = "1"
unicode-segmentation = "1.12"
......@@ -68,3 +76,4 @@ insta = { version = "1.41", features = ["glob", "json", "redactions"]}
proptest = "1.5.0"
reqwest = { version = "0.12", default-features = false, features = ["json", "stream", "rustls-tls"] }
rstest = "0.18.2"
tempfile = "3.17.1"
\ No newline at end of file
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod versioned;
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/// A trait allowing to get/set a revision on an object.
/// NATS uses this to ensure atomic updates.
pub trait Versioned {
fn revision(&self) -> u64;
fn set_revision(&mut self, r: u64);
}
......@@ -23,3 +23,5 @@ pub mod http;
pub mod kv_router;
pub mod protocols;
pub mod types;
pub mod model_card;
pub mod common;
\ No newline at end of file
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod create;
pub mod model;
\ No newline at end of file
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::path::Path;
use std::fs;
use crate::model_card::model::ModelDeploymentCard;
use anyhow::{Context, Result};
use crate::model_card::model::{ModelInfoType, TokenizerKind, PromptFormatterArtifact, File};
impl ModelDeploymentCard {
/// Creates a ModelDeploymentCard from a local directory path.
///
/// Currently HuggingFace format is supported and following files are expected:
/// - config.json: Model configuration in HuggingFace format
/// - tokenizer.json: Tokenizer configuration in HuggingFace format
/// - tokenizer_config.json: Optional prompt formatter configuration
///
/// # Arguments
/// * `local_root_dir` - Path to the local model directory
///
/// # Errors
/// Returns an error if:
/// - The path doesn't exist or isn't a directory
/// - The path contains invalid Unicode characters
/// - Required model files are missing or invalid
pub async fn from_local_path(local_root_dir: impl AsRef<Path>) -> anyhow::Result<Self> {
let local_root_dir = local_root_dir.as_ref();
check_valid_local_repo_path(local_root_dir)?;
let repo_id = local_root_dir
.canonicalize()?
.to_str()
.ok_or_else(|| anyhow::anyhow!("Path contains invalid Unicode"))?
.to_string();
let model_name = local_root_dir
.file_name()
.and_then(|n| n.to_str())
.ok_or_else(|| anyhow::anyhow!("Invalid model directory name"))?;
Self::from_repo(&repo_id, model_name).await
}
/// TODO: This will be implemented after nova-hub is integrated with the model-card
/// TODO: Attempt to auto-detect model type and construct an MDC from a NGC repo
pub async fn from_ngc_repo(_: &str) -> anyhow::Result<Self> {
Err(anyhow::anyhow!("ModelDeploymentCard::from_ngc_repo is not implemented"))
}
pub async fn from_repo(repo_id: &str, model_name: &str) -> anyhow::Result<Self> {
Ok(Self {
display_name: model_name.to_string(),
service_name: model_name.to_string(),
model_info: ModelInfoType::from_repo(repo_id).await?,
tokenizer: TokenizerKind::from_repo(repo_id).await?,
prompt_formatter: PromptFormatterArtifact::from_repo(repo_id).await?,
prompt_context: None, // TODO - auto-detect prompt context
revision: 0,
last_published: None,
requires_preprocessing: true,
})
}
}
impl ModelInfoType {
pub async fn from_repo(repo_id: &str) -> Result<Self> {
Self::try_is_hf_repo(repo_id)
.await
.with_context(|| format!("unable to extract model info from repo {}", repo_id))
}
async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
Ok(Self::HfConfigJson(
check_for_file(repo, "config.json").await?,
))
}
}
impl PromptFormatterArtifact {
pub async fn from_repo(repo_id: &str) -> Result<Option<Self>> {
// we should only error if we expect a prompt formatter and it's not found
// right now, we don't know when to expect it, so we just return Ok(Some/None)
Ok(Self::try_is_hf_repo(repo_id)
.await
.with_context(|| format!("unable to extract prompt format from repo {}", repo_id))
.ok())
}
async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
Ok(Self::HfTokenizerConfigJson(
check_for_file(repo, "tokenizer_config.json").await?,
))
}
}
impl TokenizerKind {
pub async fn from_repo(repo_id: &str) -> Result<Self> {
Self::try_is_hf_repo(repo_id)
.await
.with_context(|| format!("unable to extract tokenizer kind from repo {}", repo_id))
}
async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
Ok(Self::HfTokenizerJson(
check_for_file(repo, "tokenizer.json").await?,
))
}
}
/// Checks if the provided path contains the expected file.
async fn check_for_file(repo_id: &str, file: &str) -> anyhow::Result<File> {
let mut files = check_for_files(repo_id, vec![file.to_string()]).await?;
let file = files
.remove(file)
.ok_or(anyhow::anyhow!("file {} not found", file))?;
Ok(file)
}
async fn check_for_files(repo_id: &str, files: Vec<String>) -> Result<HashMap<String, File>> {
let dir_entries = fs::read_dir(repo_id)
.with_context(|| format!("Failed to read directory: {}", repo_id))?;
let mut found_files = HashMap::new();
for entry in dir_entries {
let entry = entry.with_context(|| format!("Failed to read directory entry in {}", repo_id))?;
let path = entry.path();
let file_name = path
.file_name()
.and_then(|n| n.to_str())
.ok_or_else(|| anyhow::anyhow!("Invalid file name in {}", repo_id))?;
if files.contains(&file_name.to_string()) {
found_files.insert(
file_name.to_string(),
path.to_str()
.ok_or_else(|| anyhow::anyhow!("Invalid path"))?
.to_string(),
);
}
}
Ok(found_files)
}
/// Checks if the provided path is a valid local repository path.
///
/// # Arguments
/// * `path` - Path to validate
///
/// # Errors
/// Returns an error if the path doesn't exist or isn't a directory
fn check_valid_local_repo_path(path: impl AsRef<Path>) -> Result<()> {
let path = path.as_ref();
if !path.exists() {
return Err(anyhow::anyhow!("Model path does not exist: {}", path.display()));
}
if !path.is_dir() {
return Err(anyhow::anyhow!("Model path is not a directory: {}", path.display()));
}
Ok(())
}
\ No newline at end of file
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! # Model Deployment Card
//!
//! The ModelDeploymentCard (MDC) is the primary model configuration structure that will be available to any
//! component that needs to interact with the model or its dependent artifacts.
//!
//! The ModelDeploymentCard contains LLM model deployment configuration information:
//! - Display name and service name for the model
//! - Model information (ModelInfoType)
//! - Tokenizer configuration (TokenizerKind)
//! - Prompt formatter settings (PromptFormatterArtifact)
//! - Various metadata like revision, publish time, etc.
use anyhow::Result;
use either::Either;
use crate::protocols::TokenIdType;
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use std::fmt;
use std::path::Path;
use std::time::Duration;
use derive_builder::Builder;
use triton_distributed::slug::Slug;
pub const BUCKET_NAME: &str = "mdc";
/// Delete model deployment cards that haven't been re-published after this long.
/// Cleans up if the worker stopped.
pub const BUCKET_TTL: Duration = Duration::from_secs(5 * 60);
/// If a model deployment card hasn't been refreshed in this much time the worker is likely gone
const CARD_MAX_AGE: chrono::TimeDelta = chrono::TimeDelta::minutes(5);
pub type File = String;
#[derive(Serialize, Deserialize, Clone, Debug)]
#[serde(rename_all = "snake_case")]
pub enum ModelInfoType {
HfConfigJson(File),
}
#[derive(Serialize, Deserialize, Clone, Debug)]
#[serde(rename_all = "snake_case")]
pub enum TokenizerKind {
HfTokenizerJson(File),
}
/// Supported types of prompt formatters.
///
/// We need a way to associate the prompt formatter template definition with an associated
/// data model which is expected for rendering.
///
/// All current prompt formatters are Jinja2 templates which use the OpenAI ChatCompletionRequest
/// format. However, we currently do not have a discovery path to know if the model supports tool use
/// unless we inspect the template.
///
/// TODO(): Add an enum for the PromptFormatDataModel with at minimum arms for:
/// - OaiChat
/// - OaiChatToolUse
#[derive(Serialize, Deserialize, Clone, Debug)]
#[serde(rename_all = "snake_case")]
pub enum PromptFormatterArtifact {
HfTokenizerConfigJson(File),
}
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash)]
#[serde(rename_all = "snake_case")]
pub enum PromptContextMixin {
/// Support OAI Chat Messages and Tools
OaiChat,
/// Enables templates with `{{datatime}}` to be rendered with the current date and time.
Llama3DateTime,
}
#[derive(Serialize, Deserialize, Clone, Debug, Builder)]
pub struct ModelDeploymentCard {
/// Human readable model name, e.g. "Meta Llama 3.1 8B Instruct"
pub display_name: String,
/// Identifier to expect in OpenAI compatible HTTP request, e.g. "meta-llama/Meta-Llama-3.1-8B-Instruct"
/// This will get slugified for use in NATS.
pub service_name: String,
/// Model information
pub model_info: ModelInfoType,
/// Tokenizer configuration
pub tokenizer: TokenizerKind,
/// Prompt Formatter configuration
#[serde(default, skip_serializing_if = "Option::is_none")]
pub prompt_formatter: Option<PromptFormatterArtifact>,
/// Prompt Formatter Config
#[serde(default, skip_serializing_if = "Option::is_none")]
pub prompt_context: Option<Vec<PromptContextMixin>>,
/// When this card was last advertised by a worker. None if not yet published.
pub last_published: Option<chrono::DateTime<chrono::Utc>>,
/// Incrementing count of how many times we published this card
#[serde(default, skip_serializing)]
pub revision: u64,
/// Does this model expect preprocessing (tokenization, etc) to be already done?
/// If this is true they get a BackendInput JSON. If this is false they get
/// a ChatCompletionRequest JSON.
#[serde(default)]
pub requires_preprocessing: bool,
}
impl ModelDeploymentCard {
pub fn builder() -> ModelDeploymentCardBuilder {
ModelDeploymentCardBuilder::default()
}
/// A URL and NATS friendly and very likely unique ID for this model.
/// Mostly human readable. a-z, 0-9, _ and - only.
/// Pass the service_name.
pub fn service_name_slug(s: &str) -> Slug {
Slug::from_string(s)
}
pub fn set_service_name(&mut self, service_name: &str) {
self.service_name = service_name.to_string();
}
/// How often we should check if a model deployment card expired because it's workers are gone
pub fn expiry_check_period() -> Duration {
match CARD_MAX_AGE.to_std() {
Ok(duration) => duration / 3,
Err(_) => {
// Only happens if CARD_MAX_AGE is negative, which it isn't
unreachable!("Cannot run card expiry watcher, invalid CARD_MAX_AGE");
}
}
}
pub fn slug(&self) -> Slug {
ModelDeploymentCard::service_name_slug(&self.service_name)
}
/// Load a model deployment card from a JSON file
pub fn load_from_json_file<P: AsRef<Path>>(file: P) -> std::io::Result<Self> {
let mut card: ModelDeploymentCard = serde_json::from_str(&std::fs::read_to_string(file)?)?;
card.requires_preprocessing = false;
Ok(card)
}
/// Load a model deployment card from a JSON string
pub fn load_from_json_str(json: &str) -> Result<Self, anyhow::Error> {
Ok(serde_json::from_str(json)?)
}
/// Save the model deployment card to a JSON file
pub fn save_to_json_file(&self, file: &str) -> Result<(), anyhow::Error> {
std::fs::write(file, self.to_json()?)?;
Ok(())
}
/// Serialize the model deployment card to a JSON string
pub fn to_json(&self) -> Result<String, anyhow::Error> {
Ok(serde_json::to_string(self)?)
}
pub fn mdcsum(&self) -> String {
let json = self.to_json().unwrap();
format!("{}", blake3::hash(json.as_bytes()))
}
/// Was this card last published a long time ago, suggesting the worker is gone?
pub fn is_expired(&self) -> bool {
if let Some(last_published) = self.last_published.as_ref() {
chrono::Utc::now() - last_published > CARD_MAX_AGE
} else {
false
}
}
}
impl fmt::Display for ModelDeploymentCard {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.slug())
}
}
pub trait ModelInfo: Send + Sync {
/// Model type
fn model_type(&self) -> String;
/// Token ID for the beginning of sequence
fn bos_token_id(&self) -> TokenIdType;
/// Token ID for the end of sequence
fn eos_token_ids(&self) -> Vec<TokenIdType>;
/// Maximum position embeddings / max sequence length
fn max_position_embeddings(&self) -> usize;
/// Vocabulary size
fn vocab_size(&self) -> usize;
}
impl ModelInfoType {
pub async fn get_model_info(&self) -> Result<Arc<dyn ModelInfo>> {
match self {
Self::HfConfigJson(info) => HFConfigJsonFile::from_file(info).await,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct HFConfigJsonFile {
bos_token_id: TokenIdType,
#[serde(with = "either::serde_untagged")]
eos_token_id: Either<TokenIdType, Vec<TokenIdType>>,
/// denotes the mixin to the flattened data model which can be present
/// in the config.json file
architectures: Vec<String>,
/// general model type
model_type: String,
/// max sequence length
max_position_embeddings: usize,
/// number of layers in the model
num_hidden_layers: usize,
/// number of attention heads in the model
num_attention_heads: usize,
/// Vocabulary size
vocab_size: usize,
}
impl HFConfigJsonFile {
async fn from_file(file: &File) -> Result<Arc<dyn ModelInfo>> {
let contents = std::fs::read_to_string(&file)?;
let config: Self = serde_json::from_str(&contents)?;
Ok(Arc::new(config))
}
}
impl ModelInfo for HFConfigJsonFile {
fn model_type(&self) -> String {
self.model_type.clone()
}
fn bos_token_id(&self) -> TokenIdType {
self.bos_token_id
}
fn eos_token_ids(&self) -> Vec<TokenIdType> {
match &self.eos_token_id {
Either::Left(eos_token_id) => vec![*eos_token_id],
Either::Right(eos_token_ids) => eos_token_ids.clone(),
}
}
fn max_position_embeddings(&self) -> usize {
self.max_position_embeddings
}
fn vocab_size(&self) -> usize {
self.vocab_size
}
}
{
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 128000,
"eos_token_id": 128009,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 14336,
"max_position_embeddings": 8192,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 500000.0,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.40.0.dev0",
"use_cache": true,
"vocab_size": 128256
}
{
"bos_token_id": 128000,
"eos_token_id": [128001, 128009],
"do_sample": true,
"temperature": 0.6,
"max_length": 4096,
"top_p": 0.9,
"transformers_version": "4.40.0.dev0"
}
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 128000,
"content": "<|begin_of_text|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 128001,
"content": "<|end_of_text|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 128002,
"content": "<|reserved_special_token_0|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 128003,
"content": "<|reserved_special_token_1|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 128004,
"content": "<|reserved_special_token_2|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 128005,
"content": "<|reserved_special_token_3|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 128006,
"content": "<|start_header_id|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 128007,
"content": "<|end_header_id|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 128008,
"content": "<|reserved_special_token_4|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 128009,
"content": "<|eot_id|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 128010,
"content": "<|reserved_special_token_5|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Sequence",
"pretokenizers": [
{
"type": "Split",
"pattern": {
"Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
},
"behavior": "Isolated",
"invert": false
},
{
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": false
}
]
},
"post_processor": {
"type": "Sequence",
"processors": [
{
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": false,
"use_regex": true
},
{
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "<|begin_of_text|>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "<|begin_of_text|>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "<|begin_of_text|>",
"type_id": 1
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {
"<|begin_of_text|>": {
"id": "<|begin_of_text|>",
"ids": [
128000
],
"tokens": [
"<|begin_of_text|>"
]
}
}
}
]
},
"decoder": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": true,
"vocab": {},
"merges": []
}
}
\ No newline at end of file
{
"added_tokens_decoder": {
"128000": {
"content": "<|begin_of_text|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128001": {
"content": "<|end_of_text|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128002": {
"content": "<|reserved_special_token_0|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128003": {
"content": "<|reserved_special_token_1|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128004": {
"content": "<|reserved_special_token_2|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128005": {
"content": "<|reserved_special_token_3|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128006": {
"content": "<|start_header_id|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128007": {
"content": "<|end_header_id|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128008": {
"content": "<|reserved_special_token_4|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"128009": {
"content": "<|eot_id|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"bos_token": "<|begin_of_text|>",
"chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim %}{% if loop.first %}{% set content = bos_token + content %}{% endif %}{% if not loop.last %}{% set content = content + '<|eot_id|>'%}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
"clean_up_tokenization_spaces": true,
"eos_token": "<|eot_id|>",
"model_input_names": [
"input_ids",
"attention_mask"
],
"model_max_length": 1000000000000000019884624838656,
"tokenizer_class": "PreTrainedTokenizerFast"
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use triton_llm::model_card::model::{ModelDeploymentCard, ModelInfoType, TokenizerKind, PromptFormatterArtifact};
use tempfile::tempdir;
#[tokio::test]
async fn test_model_info_from_hf_like_local_repo() {
let path = "tests/data/sample-models/mock-llama-3.1-8b-instruct";
let mdc = ModelDeploymentCard::from_local_path(path).await.unwrap();
let info = mdc.model_info.get_model_info().await.unwrap();
assert_eq!(info.model_type(), "llama");
assert_eq!(info.bos_token_id(), 128000);
assert_eq!(info.eos_token_ids(), vec![128009]);
assert_eq!(info.max_position_embeddings(), 8192);
assert_eq!(info.vocab_size(), 128256);
}
#[tokio::test]
async fn test_model_info_from_non_existent_local_repo() {
let path = "tests/data/sample-models/this-model-does-not-exist";
let result = ModelDeploymentCard::from_local_path(path).await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_tokenizer_from_hf_like_local_repo() {
let path = "tests/data/sample-models/mock-llama-3.1-8b-instruct";
let mdc = ModelDeploymentCard::from_local_path(path).await.unwrap();
// Verify tokenizer file was found
match mdc.tokenizer {
TokenizerKind::HfTokenizerJson(_) => (),
_ => panic!("Expected HfTokenizerJson"),
}
}
#[tokio::test]
async fn test_prompt_formatter_from_hf_like_local_repo() {
let path = "tests/data/sample-models/mock-llama-3.1-8b-instruct";
let mdc = ModelDeploymentCard::from_local_path(path).await.unwrap();
// Verify prompt formatter was found
match mdc.prompt_formatter {
Some(PromptFormatterArtifact::HfTokenizerConfigJson(_)) => (),
_ => panic!("Expected HfTokenizerConfigJson prompt formatter"),
}
}
#[tokio::test]
async fn test_missing_required_files() {
// Create empty temp directory
let temp_dir = tempdir().unwrap();
let result = ModelDeploymentCard::from_local_path(temp_dir.path()).await;
assert!(result.is_err());
let err = result.unwrap_err().to_string();
// Should fail because config.json is missing
assert!(err.contains("unable to extract"));
}
\ No newline at end of file
......@@ -469,6 +469,7 @@ dependencies = [
"iana-time-zone",
"js-sys",
"num-traits",
"serde",
"wasm-bindgen",
"windows-targets",
]
......@@ -3189,6 +3190,7 @@ dependencies = [
"async-stream",
"async-trait",
"axum 0.8.1",
"blake3",
"bytes",
"chrono",
"derive_builder",
......
......@@ -38,11 +38,11 @@ pub mod protocols;
pub mod runnable;
pub mod runtime;
pub mod service;
pub mod slug;
pub mod transports;
pub mod worker;
pub mod distributed;
pub use futures::stream;
pub use tokio_util::sync::CancellationToken;
pub use worker::Worker;
......
......@@ -38,8 +38,7 @@ use std::path::PathBuf;
use tokio::time;
use validator::{Validate, ValidationError};
mod slug;
pub use slug::Slug;
pub use crate::slug::Slug;
use tracing as log;
#[derive(Clone)]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment