"lib/llm/vscode:/vscode.git/clone" did not exist on "6634f33f2768d9ef5ed74aca56fa744c02f6d0a5"
mod.rs 6.29 KB
Newer Older
1
2
3
4
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

use crate::Result;
5
use crate::component::TransportType;
6
7
8
9
10
11
12
13
use async_trait::async_trait;
use futures::Stream;
use serde::{Deserialize, Serialize};
use std::pin::Pin;

mod mock;
pub use mock::{MockDiscoveryClient, SharedMockRegistry};

14
15
16
pub mod utils;
pub use utils::watch_and_extract_field;

17
18
19
20
21
22
23
/// Query key for prefix-based discovery queries
/// Supports hierarchical queries from all endpoints down to specific endpoints
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum DiscoveryKey {
    /// Query all endpoints in the system
    AllEndpoints,
    /// Query all endpoints in a specific namespace
24
25
26
    NamespacedEndpoints {
        namespace: String,
    },
27
28
29
30
31
32
33
34
35
36
37
    /// Query all endpoints in a namespace/component
    ComponentEndpoints {
        namespace: String,
        component: String,
    },
    /// Query a specific endpoint
    Endpoint {
        namespace: String,
        component: String,
        endpoint: String,
    },
38
39
40
41
42
43
44
45
46
47
48
49
50
    AllModelCards,
    NamespacedModelCards {
        namespace: String,
    },
    ComponentModelCards {
        namespace: String,
        component: String,
    },
    EndpointModelCards {
        namespace: String,
        component: String,
        endpoint: String,
    },
51
52
53
54
}

/// Specification for registering objects in the discovery plane
/// Represents the input to the register() operation
55
#[derive(Debug, Clone, PartialEq, Eq)]
56
57
58
59
60
61
pub enum DiscoverySpec {
    /// Endpoint specification for registration
    Endpoint {
        namespace: String,
        component: String,
        endpoint: String,
62
63
64
65
66
67
68
69
70
71
72
        /// Transport type and routing information
        transport: TransportType,
    },
    ModelCard {
        namespace: String,
        component: String,
        endpoint: String,
        /// ModelDeploymentCard serialized as JSON
        /// This allows lib/runtime to remain independent of lib/llm types
        /// DiscoverySpec.from_model_card() and DiscoveryInstance.deserialize_model_card() are ergonomic helpers to create and deserialize the model card.
        card_json: serde_json::Value,
73
74
75
76
    },
}

impl DiscoverySpec {
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
    /// Creates a ModelCard discovery spec from a serializable type
    /// The card will be serialized to JSON to avoid cross-crate dependencies
    pub fn from_model_card<T>(
        namespace: String,
        component: String,
        endpoint: String,
        card: &T,
    ) -> crate::Result<Self>
    where
        T: Serialize,
    {
        let card_json = serde_json::to_value(card)?;
        Ok(Self::ModelCard {
            namespace,
            component,
            endpoint,
            card_json,
        })
    }

97
98
99
100
101
102
103
    /// Attaches an instance ID to create a DiscoveryInstance
    pub fn with_instance_id(self, instance_id: u64) -> DiscoveryInstance {
        match self {
            Self::Endpoint {
                namespace,
                component,
                endpoint,
104
105
106
107
108
109
110
111
112
113
114
115
116
117
                transport,
            } => DiscoveryInstance::Endpoint(crate::component::Instance {
                namespace,
                component,
                endpoint,
                instance_id,
                transport,
            }),
            Self::ModelCard {
                namespace,
                component,
                endpoint,
                card_json,
            } => DiscoveryInstance::ModelCard {
118
119
120
121
                namespace,
                component,
                endpoint,
                instance_id,
122
                card_json,
123
124
125
126
127
128
129
            },
        }
    }
}

/// Registered instances in the discovery plane
/// Represents objects that have been successfully registered with an instance ID
130
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
131
132
#[serde(tag = "type")]
pub enum DiscoveryInstance {
133
134
135
    /// Registered endpoint instance - wraps the component::Instance directly
    Endpoint(crate::component::Instance),
    ModelCard {
136
137
138
139
        namespace: String,
        component: String,
        endpoint: String,
        instance_id: u64,
140
141
142
        /// ModelDeploymentCard serialized as JSON
        /// This allows lib/runtime to remain independent of lib/llm types
        card_json: serde_json::Value,
143
    },
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
}

impl DiscoveryInstance {
    /// Returns the instance ID for this discovery instance
    pub fn instance_id(&self) -> u64 {
        match self {
            Self::Endpoint(inst) => inst.instance_id,
            Self::ModelCard { instance_id, .. } => *instance_id,
        }
    }

    /// Deserializes the model card JSON into the specified type T
    /// Returns an error if this is not a ModelCard instance or if deserialization fails
    pub fn deserialize_model_card<T>(&self) -> crate::Result<T>
    where
        T: for<'de> Deserialize<'de>,
    {
        match self {
            Self::ModelCard { card_json, .. } => Ok(serde_json::from_value(card_json.clone())?),
            Self::Endpoint(_) => {
                crate::raise!("Cannot deserialize model card from Endpoint instance")
            }
        }
    }
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
}

/// Events emitted by the discovery client watch stream
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DiscoveryEvent {
    /// A new instance was added
    Added(DiscoveryInstance),
    /// An instance was removed (identified by instance_id)
    Removed(u64),
}

/// Stream type for discovery events
pub type DiscoveryStream = Pin<Box<dyn Stream<Item = Result<DiscoveryEvent>> + Send>>;

/// Discovery client trait for service discovery across different backends
#[async_trait]
pub trait DiscoveryClient: Send + Sync {
    /// Returns a unique identifier for this worker (e.g lease id if using etcd or generated id for memory store)
    /// Discovery objects created by this worker will be associated with this id.
    fn instance_id(&self) -> u64;

    /// Registers an object in the discovery plane with the instance id
    async fn register(&self, spec: DiscoverySpec) -> Result<DiscoveryInstance>;

192
193
194
195
    /// Returns a list of currently registered instances for the given discovery key
    /// This is a one-time snapshot without watching for changes
    async fn list(&self, key: DiscoveryKey) -> Result<Vec<DiscoveryInstance>>;

196
197
198
    /// Returns a stream of discovery events (Added/Removed) for the given discovery key
    async fn list_and_watch(&self, key: DiscoveryKey) -> Result<DiscoveryStream>;
}