"docs/backends/sglang/sgl-hicache-example.md" did not exist on "3b722842d26fd4b96fe0e246f1ee47c240f1af3f"
chat_completions.rs 6.27 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

16
17
18
19
use dynamo_runtime::protocols::annotated::AnnotationsProvider;
use serde::{Deserialize, Serialize};
use validator::Validate;

Paul Hendricks's avatar
Paul Hendricks committed
20
21
22
23
use super::nvext::NvExt;
use super::nvext::NvExtProvider;
use super::OpenAISamplingOptionsProvider;
use super::OpenAIStopConditionsProvider;
24
25
26
27

mod aggregator;
mod delta;

Paul Hendricks's avatar
Paul Hendricks committed
28
pub use aggregator::DeltaAggregator;
29
30
pub use delta::DeltaGenerator;

31
32
33
34
35
36
37
/// A request structure for creating a chat completion, extending OpenAI's
/// `CreateChatCompletionRequest` with [`NvExt`] extensions.
///
/// # Fields
/// - `inner`: The base OpenAI chat completion request, embedded using `serde(flatten)`.
/// - `nvext`: The optional NVIDIA extension field. See [`NvExt`] for
///   more details.
Paul Hendricks's avatar
Paul Hendricks committed
38
#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
39
pub struct NvCreateChatCompletionRequest {
Paul Hendricks's avatar
Paul Hendricks committed
40
41
    #[serde(flatten)]
    pub inner: async_openai::types::CreateChatCompletionRequest,
42
43

    #[serde(skip_serializing_if = "Option::is_none")]
44
45
46
    pub nvext: Option<NvExt>,
}

47
48
49
50
51
52
/// A response structure for unary chat completion responses, embedding OpenAI's
/// `CreateChatCompletionResponse`.
///
/// # Fields
/// - `inner`: The base OpenAI unary chat completion response, embedded
///   using `serde(flatten)`.
Paul Hendricks's avatar
Paul Hendricks committed
53
#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
54
pub struct NvCreateChatCompletionResponse {
Paul Hendricks's avatar
Paul Hendricks committed
55
56
    #[serde(flatten)]
    pub inner: async_openai::types::CreateChatCompletionResponse,
57
58
}

59
60
61
62
63
64
/// A response structure for streamed chat completions, embedding OpenAI's
/// `CreateChatCompletionStreamResponse`.
///
/// # Fields
/// - `inner`: The base OpenAI streaming chat completion response, embedded
///   using `serde(flatten)`.
Paul Hendricks's avatar
Paul Hendricks committed
65
#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
66
pub struct NvCreateChatCompletionStreamResponse {
Paul Hendricks's avatar
Paul Hendricks committed
67
68
    #[serde(flatten)]
    pub inner: async_openai::types::CreateChatCompletionStreamResponse,
69
70
}

71
72
/// Implements `NvExtProvider` for `NvCreateChatCompletionRequest`,
/// providing access to NVIDIA-specific extensions.
73
impl NvExtProvider for NvCreateChatCompletionRequest {
74
    /// Returns a reference to the optional `NvExt` extension, if available.
75
76
77
78
    fn nvext(&self) -> Option<&NvExt> {
        self.nvext.as_ref()
    }

79
    /// Returns `None`, as raw prompt extraction is not implemented.
80
81
82
83
84
    fn raw_prompt(&self) -> Option<String> {
        None
    }
}

85
86
/// Implements `AnnotationsProvider` for `NvCreateChatCompletionRequest`,
/// enabling retrieval and management of request annotations.
87
impl AnnotationsProvider for NvCreateChatCompletionRequest {
88
    /// Retrieves the list of annotations from `NvExt`, if present.
Biswa Panda's avatar
Biswa Panda committed
89
90
91
92
93
94
    fn annotations(&self) -> Option<Vec<String>> {
        self.nvext
            .as_ref()
            .and_then(|nvext| nvext.annotations.clone())
    }

95
96
97
98
99
100
101
    /// Checks whether a specific annotation exists in the request.
    ///
    /// # Arguments
    /// * `annotation` - A string slice representing the annotation to check.
    ///
    /// # Returns
    /// `true` if the annotation exists, `false` otherwise.
Biswa Panda's avatar
Biswa Panda committed
102
103
104
105
106
107
108
109
    fn has_annotation(&self, annotation: &str) -> bool {
        self.nvext
            .as_ref()
            .and_then(|nvext| nvext.annotations.as_ref())
            .map(|annotations| annotations.contains(&annotation.to_string()))
            .unwrap_or(false)
    }
}
110

111
112
/// Implements `OpenAISamplingOptionsProvider` for `NvCreateChatCompletionRequest`,
/// exposing OpenAI's sampling parameters for chat completion.
113
impl OpenAISamplingOptionsProvider for NvCreateChatCompletionRequest {
114
    /// Retrieves the temperature parameter for sampling, if set.
115
    fn get_temperature(&self) -> Option<f32> {
Paul Hendricks's avatar
Paul Hendricks committed
116
        self.inner.temperature
117
118
    }

119
    /// Retrieves the top-p (nucleus sampling) parameter, if set.
120
    fn get_top_p(&self) -> Option<f32> {
Paul Hendricks's avatar
Paul Hendricks committed
121
        self.inner.top_p
122
123
    }

124
    /// Retrieves the frequency penalty parameter, if set.
125
    fn get_frequency_penalty(&self) -> Option<f32> {
Paul Hendricks's avatar
Paul Hendricks committed
126
        self.inner.frequency_penalty
127
128
    }

129
    /// Retrieves the presence penalty parameter, if set.
130
    fn get_presence_penalty(&self) -> Option<f32> {
Paul Hendricks's avatar
Paul Hendricks committed
131
        self.inner.presence_penalty
132
133
    }

134
    /// Returns a reference to the optional `NvExt` extension, if available.
135
136
137
138
139
    fn nvext(&self) -> Option<&NvExt> {
        self.nvext.as_ref()
    }
}

140
141
/// Implements `OpenAIStopConditionsProvider` for `NvCreateChatCompletionRequest`,
/// providing access to stop conditions that control chat completion behavior.
142
impl OpenAIStopConditionsProvider for NvCreateChatCompletionRequest {
143
    /// Retrieves the maximum number of tokens allowed in the response.
144
    #[allow(deprecated)]
Paul Hendricks's avatar
Paul Hendricks committed
145
    fn get_max_tokens(&self) -> Option<u32> {
146
        self.inner.max_completion_tokens.or(self.inner.max_tokens)
147
148
    }

149
150
151
152
153
    /// Retrieves the minimum number of tokens required in the response.
    ///
    /// # Note
    /// This method is currently a placeholder and always returns `None`
    /// since `min_tokens` is not an OpenAI-supported parameter.
Paul Hendricks's avatar
Paul Hendricks committed
154
155
    fn get_min_tokens(&self) -> Option<u32> {
        None
156
157
    }

158
159
160
161
162
163
164
    /// Retrieves the stop conditions that terminate the chat completion response.
    ///
    /// Converts OpenAI's `Stop` enum to a `Vec<String>`, normalizing the representation.
    ///
    /// # Returns
    /// * `Some(Vec<String>)` if stop conditions are set.
    /// * `None` if no stop conditions are defined.
165
    fn get_stop(&self) -> Option<Vec<String>> {
166
167
168
169
        self.inner.stop.as_ref().map(|stop| match stop {
            async_openai::types::Stop::String(s) => vec![s.clone()],
            async_openai::types::Stop::StringArray(arr) => arr.clone(),
        })
170
171
    }

172
    /// Returns a reference to the optional `NvExt` extension, if available.
173
174
175
176
    fn nvext(&self) -> Option<&NvExt> {
        self.nvext.as_ref()
    }
}