"lib/bindings/vscode:/vscode.git/clone" did not exist on "5b24b429299784c77c9e23b61a15499c2df7c927"
model_entry.rs 2.18 KB
Newer Older
1
2
3
4
5
6
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

use std::sync::Arc;

use dynamo_runtime::transports::etcd;
7
8
9
10
11
use dynamo_runtime::{
    protocols,
    slug::Slug,
    storage::key_value_store::{EtcdStorage, KeyValueStore, KeyValueStoreManager},
};
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
use serde::{Deserialize, Serialize};

use crate::{
    model_card::{self, ModelDeploymentCard},
    model_type::ModelType,
};

/// [ModelEntry] contains the information to discover models from the etcd cluster.
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub struct ModelEntry {
    /// Public name of the model
    /// This will be used to identify the model in the HTTP service from the value used in an an OpenAI ChatRequest.
    pub name: String,

    /// How to address this on the network
    pub endpoint: protocols::Endpoint,

    /// Specifies whether the model is a chat, completions, etc model.
    pub model_type: ModelType,
}

impl ModelEntry {
34
35
36
37
38
    /// Slugified display name for use in etcd and NATS
    pub fn slug(&self) -> Slug {
        Slug::from_string(&self.name)
    }

39
40
41
42
43
44
45
46
47
48
49
50
    pub fn requires_preprocessing(&self) -> bool {
        matches!(self.model_type, ModelType::Backend)
    }

    /// Fetch the ModelDeploymentCard from NATS.
    /// This does not touch it's fields so you may need to call move_from_nats on it.
    pub async fn load_mdc(
        &self,
        etcd_client: &etcd::Client,
    ) -> anyhow::Result<ModelDeploymentCard> {
        let kvstore: Box<dyn KeyValueStore> = Box::new(EtcdStorage::new(etcd_client.clone()));
        let card_store = Arc::new(KeyValueStoreManager::new(kvstore));
51
        let card_key = self.slug();
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
        match card_store
            .load::<ModelDeploymentCard>(model_card::ROOT_PATH, &card_key)
            .await
        {
            Ok(Some(mdc)) => Ok(mdc),
            Ok(None) => {
                anyhow::bail!("Missing ModelDeploymentCard in etcd under key {card_key}");
            }
            Err(err) => {
                anyhow::bail!(
                    "Error fetching ModelDeploymentCard from etcd under key {card_key}. {err}"
                );
            }
        }
    }
}