Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
c103d56a
Unverified
Commit
c103d56a
authored
Jun 04, 2025
by
Paul Hendricks
Committed by
GitHub
Jun 04, 2025
Browse files
refactor: Rename CompletionRequest to NvCreateCompletionRequest (#1383)
parent
cfd12d7f
Changes
13
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
58 additions
and
53 deletions
+58
-53
launch/dynamo-run/src/input/common.rs
launch/dynamo-run/src/input/common.rs
+2
-2
launch/dynamo-run/src/input/http.rs
launch/dynamo-run/src/input/http.rs
+5
-5
lib/engines/mistralrs/src/lib.rs
lib/engines/mistralrs/src/lib.rs
+3
-3
lib/llm/src/discovery/watcher.rs
lib/llm/src/discovery/watcher.rs
+7
-8
lib/llm/src/engines.rs
lib/llm/src/engines.rs
+12
-9
lib/llm/src/http/service/openai.rs
lib/llm/src/http/service/openai.rs
+5
-3
lib/llm/src/preprocessor.rs
lib/llm/src/preprocessor.rs
+3
-3
lib/llm/src/preprocessor/prompt/template/oai.rs
lib/llm/src/preprocessor/prompt/template/oai.rs
+2
-2
lib/llm/src/protocols/openai/completions.rs
lib/llm/src/protocols/openai/completions.rs
+7
-7
lib/llm/src/protocols/openai/completions/delta.rs
lib/llm/src/protocols/openai/completions/delta.rs
+2
-2
lib/llm/src/types.rs
lib/llm/src/types.rs
+4
-3
lib/llm/tests/http-service.rs
lib/llm/tests/http-service.rs
+3
-3
lib/llm/tests/openai_completions.rs
lib/llm/tests/openai_completions.rs
+3
-3
No files found.
launch/dynamo-run/src/input/common.rs
View file @
c103d56a
...
...
@@ -139,7 +139,7 @@ mod tests {
use
super
::
*
;
use
dynamo_llm
::
types
::
openai
::{
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
},
completions
::{
CompletionRe
quest
,
CompletionRe
sponse
},
completions
::{
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
},
};
const
HF_PATH
:
&
str
=
concat!
(
...
...
@@ -174,7 +174,7 @@ mod tests {
// Build pipeline for completions
let
pipeline
=
build_pipeline
::
<
CompletionRequest
,
CompletionResponse
>
(
&
card
,
engine
)
.await
?
;
build_pipeline
::
<
NvCreate
CompletionRequest
,
CompletionResponse
>
(
&
card
,
engine
)
.await
?
;
// Verify pipeline was created
assert
!
(
Arc
::
strong_count
(
&
pipeline
)
>=
1
);
...
...
launch/dynamo-run/src/input/http.rs
View file @
c103d56a
...
...
@@ -15,7 +15,7 @@ use dynamo_llm::{
openai
::
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
,
},
openai
::
completions
::{
CompletionRe
quest
,
CompletionRe
sponse
},
openai
::
completions
::{
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
},
},
};
use
dynamo_runtime
::
pipeline
::
RouterMode
;
...
...
@@ -76,10 +76,10 @@ pub async fn run(
.await
?
;
manager
.add_chat_completions_model
(
model
.service_name
(),
chat_pipeline
)
?
;
let
cmpl_pipeline
=
common
::
build_pipeline
::
<
CompletionRequest
,
CompletionResponse
>
(
model
.card
()
,
inner_engin
e
,
)
let
cmpl_pipeline
=
common
::
build_pipeline
::
<
NvCreateCompletionRequest
,
CompletionRespons
e
,
>
(
model
.card
(),
inner_engine
)
.await
?
;
manager
.add_completions_model
(
model
.service_name
(),
cmpl_pipeline
)
?
;
}
...
...
lib/engines/mistralrs/src/lib.rs
View file @
c103d56a
...
...
@@ -25,7 +25,7 @@ use dynamo_runtime::protocols::annotated::Annotated;
use
dynamo_llm
::
protocols
::
openai
::{
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
},
completions
::{
prompt_to_string
,
CompletionRe
quest
,
CompletionRe
sponse
},
completions
::{
prompt_to_string
,
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
},
embeddings
::{
NvCreateEmbeddingRequest
,
NvCreateEmbeddingResponse
},
};
...
...
@@ -470,12 +470,12 @@ fn to_logit_bias(lb: HashMap<String, serde_json::Value>) -> HashMap<u32, f32> {
}
#[async_trait]
impl
AsyncEngine
<
SingleIn
<
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
impl
AsyncEngine
<
SingleIn
<
NvCreate
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
for
MistralRsEngine
{
async
fn
generate
(
&
self
,
request
:
SingleIn
<
CompletionRequest
>
,
request
:
SingleIn
<
NvCreate
CompletionRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
{
let
(
request
,
context
)
=
request
.transfer
(());
let
ctx
=
context
.context
();
...
...
lib/llm/src/discovery/watcher.rs
View file @
c103d56a
...
...
@@ -25,7 +25,7 @@ use crate::{
protocols
::
openai
::
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
,
},
protocols
::
openai
::
completions
::{
CompletionRe
quest
,
CompletionRe
sponse
},
protocols
::
openai
::
completions
::{
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
},
protocols
::
openai
::
embeddings
::{
NvCreateEmbeddingRequest
,
NvCreateEmbeddingResponse
},
};
...
...
@@ -239,7 +239,7 @@ impl ModelWatcher {
.add_chat_completions_model
(
&
model_entry
.name
,
chat_engine
)
?
;
let
frontend
=
SegmentSource
::
<
SingleIn
<
CompletionRequest
>
,
SingleIn
<
NvCreate
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
>
::
new
();
let
preprocessor
=
OpenAIPreprocessor
::
new
(
card
.clone
())
.await
?
.into_operator
();
...
...
@@ -290,11 +290,10 @@ impl ModelWatcher {
.add_chat_completions_model
(
&
model_entry
.name
,
engine
)
?
;
}
ModelType
::
Completion
=>
{
let
push_router
=
PushRouter
::
<
CompletionRequest
,
Annotated
<
CompletionResponse
>>
::
from_client
(
client
,
Default
::
default
(),
)
let
push_router
=
PushRouter
::
<
NvCreateCompletionRequest
,
Annotated
<
CompletionResponse
>
,
>
::
from_client
(
client
,
Default
::
default
())
.await
?
;
let
engine
=
Arc
::
new
(
push_router
);
self
.manager
...
...
lib/llm/src/engines.rs
View file @
c103d56a
...
...
@@ -30,7 +30,7 @@ use crate::preprocessor::PreprocessedRequest;
use
crate
::
protocols
::
common
::
llm_backend
::
LLMEngineOutput
;
use
crate
::
protocols
::
openai
::{
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
},
completions
::{
prompt_to_string
,
CompletionRe
quest
,
CompletionRe
sponse
},
completions
::{
prompt_to_string
,
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
},
};
use
crate
::
types
::
openai
::
embeddings
::
NvCreateEmbeddingRequest
;
use
crate
::
types
::
openai
::
embeddings
::
NvCreateEmbeddingResponse
;
...
...
@@ -140,7 +140,7 @@ impl<E> EngineDispatcher<E> {
pub
trait
StreamingEngine
:
Send
+
Sync
{
async
fn
handle_completion
(
&
self
,
req
:
SingleIn
<
CompletionRequest
>
,
req
:
SingleIn
<
NvCreate
CompletionRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
;
async
fn
handle_chat
(
...
...
@@ -218,12 +218,12 @@ impl
}
#[async_trait]
impl
AsyncEngine
<
SingleIn
<
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
impl
AsyncEngine
<
SingleIn
<
NvCreate
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
for
EchoEngineFull
{
async
fn
generate
(
&
self
,
incoming_request
:
SingleIn
<
CompletionRequest
>
,
incoming_request
:
SingleIn
<
NvCreate
CompletionRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
{
let
(
request
,
context
)
=
incoming_request
.transfer
(());
let
deltas
=
request
.response_generator
();
...
...
@@ -265,8 +265,11 @@ impl
#[async_trait]
impl
<
E
>
StreamingEngine
for
EngineDispatcher
<
E
>
where
E
:
AsyncEngine
<
SingleIn
<
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
+
AsyncEngine
<
E
:
AsyncEngine
<
SingleIn
<
NvCreateCompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
,
>
+
AsyncEngine
<
SingleIn
<
NvCreateChatCompletionRequest
>
,
ManyOut
<
Annotated
<
NvCreateChatCompletionStreamResponse
>>
,
Error
,
...
...
@@ -279,7 +282,7 @@ where
{
async
fn
handle_completion
(
&
self
,
req
:
SingleIn
<
CompletionRequest
>
,
req
:
SingleIn
<
NvCreate
CompletionRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
{
self
.inner
.generate
(
req
)
.await
}
...
...
@@ -343,12 +346,12 @@ impl StreamingEngineAdapter {
}
#[async_trait]
impl
AsyncEngine
<
SingleIn
<
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
impl
AsyncEngine
<
SingleIn
<
NvCreate
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
for
StreamingEngineAdapter
{
async
fn
generate
(
&
self
,
req
:
SingleIn
<
CompletionRequest
>
,
req
:
SingleIn
<
NvCreate
CompletionRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
{
self
.0
.handle_completion
(
req
)
.await
}
...
...
lib/llm/src/http/service/openai.rs
View file @
c103d56a
...
...
@@ -33,7 +33,9 @@ use crate::protocols::openai::{
};
use
crate
::
request_template
::
RequestTemplate
;
use
crate
::
types
::{
openai
::{
chat_completions
::
NvCreateChatCompletionRequest
,
completions
::
CompletionRequest
},
openai
::{
chat_completions
::
NvCreateChatCompletionRequest
,
completions
::
NvCreateCompletionRequest
,
},
Annotated
,
};
...
...
@@ -120,7 +122,7 @@ impl From<HttpError> for ErrorResponse {
#[tracing::instrument(skip_all)]
async
fn
completions
(
State
(
state
):
State
<
Arc
<
service_v2
::
State
>>
,
Json
(
request
):
Json
<
CompletionRequest
>
,
Json
(
request
):
Json
<
NvCreate
CompletionRequest
>
,
)
->
Result
<
Response
,
(
StatusCode
,
Json
<
ErrorResponse
>
)
>
{
// return a 503 if the service is not ready
check_ready
(
&
state
)
?
;
...
...
@@ -137,7 +139,7 @@ async fn completions(
..
request
.inner
};
let
request
=
CompletionRequest
{
let
request
=
NvCreate
CompletionRequest
{
inner
,
nvext
:
request
.nvext
,
};
...
...
lib/llm/src/preprocessor.rs
View file @
c103d56a
...
...
@@ -46,7 +46,7 @@ use crate::protocols::{
common
::{
SamplingOptionsProvider
,
StopConditionsProvider
},
openai
::{
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
},
completions
::{
CompletionRe
quest
,
CompletionRe
sponse
},
completions
::{
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
},
nvext
::
NvExtProvider
,
DeltaGeneratorExt
,
},
...
...
@@ -341,7 +341,7 @@ impl
#[async_trait]
impl
Operator
<
SingleIn
<
CompletionRequest
>
,
SingleIn
<
NvCreate
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
SingleIn
<
PreprocessedRequest
>
,
ManyOut
<
Annotated
<
BackendOutput
>>
,
...
...
@@ -349,7 +349,7 @@ impl
{
async
fn
generate
(
&
self
,
request
:
SingleIn
<
CompletionRequest
>
,
request
:
SingleIn
<
NvCreate
CompletionRequest
>
,
next
:
Arc
<
dyn
AsyncEngine
<
SingleIn
<
PreprocessedRequest
>
,
...
...
lib/llm/src/preprocessor/prompt/template/oai.rs
View file @
c103d56a
...
...
@@ -18,7 +18,7 @@ use super::*;
use
minijinja
::{
context
,
value
::
Value
};
use
crate
::
protocols
::
openai
::{
chat_completions
::
NvCreateChatCompletionRequest
,
completions
::
CompletionRequest
,
chat_completions
::
NvCreateChatCompletionRequest
,
completions
::
NvCreate
CompletionRequest
,
};
use
tracing
;
...
...
@@ -55,7 +55,7 @@ impl OAIChatLikeRequest for NvCreateChatCompletionRequest {
}
}
impl
OAIChatLikeRequest
for
CompletionRequest
{
impl
OAIChatLikeRequest
for
NvCreate
CompletionRequest
{
fn
messages
(
&
self
)
->
minijinja
::
value
::
Value
{
let
message
=
async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
async_openai
::
types
::
ChatCompletionRequestUserMessage
{
...
...
lib/llm/src/protocols/openai/completions.rs
View file @
c103d56a
...
...
@@ -34,7 +34,7 @@ use super::{
use
dynamo_runtime
::
protocols
::
annotated
::
AnnotationsProvider
;
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
CompletionRequest
{
pub
struct
NvCreate
CompletionRequest
{
#[serde(flatten)]
pub
inner
:
async_openai
::
types
::
CreateCompletionRequest
,
...
...
@@ -141,7 +141,7 @@ pub fn prompt_to_string(prompt: &async_openai::types::Prompt) -> String {
}
}
impl
NvExtProvider
for
CompletionRequest
{
impl
NvExtProvider
for
NvCreate
CompletionRequest
{
fn
nvext
(
&
self
)
->
Option
<&
NvExt
>
{
self
.nvext
.as_ref
()
}
...
...
@@ -158,7 +158,7 @@ impl NvExtProvider for CompletionRequest {
}
}
impl
AnnotationsProvider
for
CompletionRequest
{
impl
AnnotationsProvider
for
NvCreate
CompletionRequest
{
fn
annotations
(
&
self
)
->
Option
<
Vec
<
String
>>
{
self
.nvext
.as_ref
()
...
...
@@ -174,7 +174,7 @@ impl AnnotationsProvider for CompletionRequest {
}
}
impl
OpenAISamplingOptionsProvider
for
CompletionRequest
{
impl
OpenAISamplingOptionsProvider
for
NvCreate
CompletionRequest
{
fn
get_temperature
(
&
self
)
->
Option
<
f32
>
{
self
.inner.temperature
}
...
...
@@ -196,7 +196,7 @@ impl OpenAISamplingOptionsProvider for CompletionRequest {
}
}
impl
OpenAIStopConditionsProvider
for
CompletionRequest
{
impl
OpenAIStopConditionsProvider
for
NvCreate
CompletionRequest
{
fn
get_max_tokens
(
&
self
)
->
Option
<
u32
>
{
self
.inner.max_tokens
}
...
...
@@ -255,10 +255,10 @@ impl ResponseFactory {
}
/// Implements TryFrom for converting an OpenAI's CompletionRequest to an Engine's CompletionRequest
impl
TryFrom
<
CompletionRequest
>
for
common
::
CompletionRequest
{
impl
TryFrom
<
NvCreate
CompletionRequest
>
for
common
::
CompletionRequest
{
type
Error
=
anyhow
::
Error
;
fn
try_from
(
request
:
CompletionRequest
)
->
Result
<
Self
,
Self
::
Error
>
{
fn
try_from
(
request
:
NvCreate
CompletionRequest
)
->
Result
<
Self
,
Self
::
Error
>
{
// openai_api_rs::v1::completion::CompletionRequest {
// NA pub model: String,
// pub prompt: String,
...
...
lib/llm/src/protocols/openai/completions/delta.rs
View file @
c103d56a
...
...
@@ -13,11 +13,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use
super
::{
CompletionChoice
,
CompletionRe
quest
,
CompletionRe
sponse
};
use
super
::{
CompletionChoice
,
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
};
use
crate
::
protocols
::
common
;
use
crate
::
protocols
::
openai
::
CompletionUsage
;
impl
CompletionRequest
{
impl
NvCreate
CompletionRequest
{
// put this method on the request
// inspect the request to extract options
pub
fn
response_generator
(
&
self
)
->
DeltaGenerator
{
...
...
lib/llm/src/types.rs
View file @
c103d56a
...
...
@@ -24,14 +24,15 @@ pub mod openai {
pub
mod
completions
{
use
super
::
*
;
pub
use
protocols
::
openai
::
completions
::{
CompletionRe
quest
,
CompletionRe
sponse
};
pub
use
protocols
::
openai
::
completions
::{
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
};
/// A [`UnaryEngine`] implementation for the OpenAI Completions API
pub
type
OpenAICompletionsUnaryEngine
=
UnaryEngine
<
CompletionRequest
,
CompletionResponse
>
;
pub
type
OpenAICompletionsUnaryEngine
=
UnaryEngine
<
NvCreateCompletionRequest
,
CompletionResponse
>
;
/// A [`ServerStreamingEngine`] implementation for the OpenAI Completions API
pub
type
OpenAICompletionsStreamingEngine
=
ServerStreamingEngine
<
CompletionRequest
,
Annotated
<
CompletionResponse
>>
;
ServerStreamingEngine
<
NvCreate
CompletionRequest
,
Annotated
<
CompletionResponse
>>
;
}
pub
mod
chat_completions
{
...
...
lib/llm/tests/http-service.rs
View file @
c103d56a
...
...
@@ -24,7 +24,7 @@ use dynamo_llm::http::service::{
use
dynamo_llm
::
protocols
::{
openai
::{
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
},
completions
::{
CompletionRe
quest
,
CompletionRe
sponse
},
completions
::{
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
},
},
Annotated
,
};
...
...
@@ -101,12 +101,12 @@ impl
}
#[async_trait]
impl
AsyncEngine
<
SingleIn
<
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
impl
AsyncEngine
<
SingleIn
<
NvCreate
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
for
AlwaysFailEngine
{
async
fn
generate
(
&
self
,
_
request
:
SingleIn
<
CompletionRequest
>
,
_
request
:
SingleIn
<
NvCreate
CompletionRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
{
Err
(
HttpError
{
code
:
401
,
...
...
lib/llm/tests/openai_completions.rs
View file @
c103d56a
...
...
@@ -14,12 +14,12 @@
// limitations under the License.
use
async_openai
::
types
::
CreateCompletionRequestArgs
;
use
dynamo_llm
::
protocols
::
openai
::{
self
,
completions
::
CompletionRequest
};
use
dynamo_llm
::
protocols
::
openai
::{
self
,
completions
::
NvCreate
CompletionRequest
};
use
serde
::{
Deserialize
,
Serialize
};
#[derive(Serialize,
Deserialize,
Debug,
Clone)]
struct
CompletionSample
{
request
:
CompletionRequest
,
request
:
NvCreate
CompletionRequest
,
description
:
String
,
}
...
...
@@ -36,7 +36,7 @@ impl CompletionSample {
let
inner
=
builder
.build
()
.unwrap
();
let
request
=
CompletionRequest
{
inner
,
nvext
:
None
};
let
request
=
NvCreate
CompletionRequest
{
inner
,
nvext
:
None
};
Ok
(
Self
{
request
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment