Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
c103d56a
"lib/bindings/python/vscode:/vscode.git/clone" did not exist on "bdad6f1a50275b4607cb331573d49f073212455a"
Unverified
Commit
c103d56a
authored
Jun 04, 2025
by
Paul Hendricks
Committed by
GitHub
Jun 04, 2025
Browse files
refactor: Rename CompletionRequest to NvCreateCompletionRequest (#1383)
parent
cfd12d7f
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
58 additions
and
53 deletions
+58
-53
launch/dynamo-run/src/input/common.rs
launch/dynamo-run/src/input/common.rs
+2
-2
launch/dynamo-run/src/input/http.rs
launch/dynamo-run/src/input/http.rs
+5
-5
lib/engines/mistralrs/src/lib.rs
lib/engines/mistralrs/src/lib.rs
+3
-3
lib/llm/src/discovery/watcher.rs
lib/llm/src/discovery/watcher.rs
+7
-8
lib/llm/src/engines.rs
lib/llm/src/engines.rs
+12
-9
lib/llm/src/http/service/openai.rs
lib/llm/src/http/service/openai.rs
+5
-3
lib/llm/src/preprocessor.rs
lib/llm/src/preprocessor.rs
+3
-3
lib/llm/src/preprocessor/prompt/template/oai.rs
lib/llm/src/preprocessor/prompt/template/oai.rs
+2
-2
lib/llm/src/protocols/openai/completions.rs
lib/llm/src/protocols/openai/completions.rs
+7
-7
lib/llm/src/protocols/openai/completions/delta.rs
lib/llm/src/protocols/openai/completions/delta.rs
+2
-2
lib/llm/src/types.rs
lib/llm/src/types.rs
+4
-3
lib/llm/tests/http-service.rs
lib/llm/tests/http-service.rs
+3
-3
lib/llm/tests/openai_completions.rs
lib/llm/tests/openai_completions.rs
+3
-3
No files found.
launch/dynamo-run/src/input/common.rs
View file @
c103d56a
...
...
@@ -139,7 +139,7 @@ mod tests {
use
super
::
*
;
use
dynamo_llm
::
types
::
openai
::{
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
},
completions
::{
CompletionRe
quest
,
CompletionRe
sponse
},
completions
::{
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
},
};
const
HF_PATH
:
&
str
=
concat!
(
...
...
@@ -174,7 +174,7 @@ mod tests {
// Build pipeline for completions
let
pipeline
=
build_pipeline
::
<
CompletionRequest
,
CompletionResponse
>
(
&
card
,
engine
)
.await
?
;
build_pipeline
::
<
NvCreate
CompletionRequest
,
CompletionResponse
>
(
&
card
,
engine
)
.await
?
;
// Verify pipeline was created
assert
!
(
Arc
::
strong_count
(
&
pipeline
)
>=
1
);
...
...
launch/dynamo-run/src/input/http.rs
View file @
c103d56a
...
...
@@ -15,7 +15,7 @@ use dynamo_llm::{
openai
::
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
,
},
openai
::
completions
::{
CompletionRe
quest
,
CompletionRe
sponse
},
openai
::
completions
::{
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
},
},
};
use
dynamo_runtime
::
pipeline
::
RouterMode
;
...
...
@@ -76,10 +76,10 @@ pub async fn run(
.await
?
;
manager
.add_chat_completions_model
(
model
.service_name
(),
chat_pipeline
)
?
;
let
cmpl_pipeline
=
common
::
build_pipeline
::
<
CompletionRequest
,
CompletionResponse
>
(
model
.card
()
,
inner_engin
e
,
)
let
cmpl_pipeline
=
common
::
build_pipeline
::
<
NvCreateCompletionRequest
,
CompletionRespons
e
,
>
(
model
.card
(),
inner_engine
)
.await
?
;
manager
.add_completions_model
(
model
.service_name
(),
cmpl_pipeline
)
?
;
}
...
...
lib/engines/mistralrs/src/lib.rs
View file @
c103d56a
...
...
@@ -25,7 +25,7 @@ use dynamo_runtime::protocols::annotated::Annotated;
use
dynamo_llm
::
protocols
::
openai
::{
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
},
completions
::{
prompt_to_string
,
CompletionRe
quest
,
CompletionRe
sponse
},
completions
::{
prompt_to_string
,
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
},
embeddings
::{
NvCreateEmbeddingRequest
,
NvCreateEmbeddingResponse
},
};
...
...
@@ -470,12 +470,12 @@ fn to_logit_bias(lb: HashMap<String, serde_json::Value>) -> HashMap<u32, f32> {
}
#[async_trait]
impl
AsyncEngine
<
SingleIn
<
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
impl
AsyncEngine
<
SingleIn
<
NvCreate
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
for
MistralRsEngine
{
async
fn
generate
(
&
self
,
request
:
SingleIn
<
CompletionRequest
>
,
request
:
SingleIn
<
NvCreate
CompletionRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
{
let
(
request
,
context
)
=
request
.transfer
(());
let
ctx
=
context
.context
();
...
...
lib/llm/src/discovery/watcher.rs
View file @
c103d56a
...
...
@@ -25,7 +25,7 @@ use crate::{
protocols
::
openai
::
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
,
},
protocols
::
openai
::
completions
::{
CompletionRe
quest
,
CompletionRe
sponse
},
protocols
::
openai
::
completions
::{
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
},
protocols
::
openai
::
embeddings
::{
NvCreateEmbeddingRequest
,
NvCreateEmbeddingResponse
},
};
...
...
@@ -239,7 +239,7 @@ impl ModelWatcher {
.add_chat_completions_model
(
&
model_entry
.name
,
chat_engine
)
?
;
let
frontend
=
SegmentSource
::
<
SingleIn
<
CompletionRequest
>
,
SingleIn
<
NvCreate
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
>
::
new
();
let
preprocessor
=
OpenAIPreprocessor
::
new
(
card
.clone
())
.await
?
.into_operator
();
...
...
@@ -290,12 +290,11 @@ impl ModelWatcher {
.add_chat_completions_model
(
&
model_entry
.name
,
engine
)
?
;
}
ModelType
::
Completion
=>
{
let
push_router
=
PushRouter
::
<
CompletionRequest
,
Annotated
<
CompletionResponse
>>
::
from_client
(
client
,
Default
::
default
(),
)
.await
?
;
let
push_router
=
PushRouter
::
<
NvCreateCompletionRequest
,
Annotated
<
CompletionResponse
>
,
>
::
from_client
(
client
,
Default
::
default
())
.await
?
;
let
engine
=
Arc
::
new
(
push_router
);
self
.manager
.add_completions_model
(
&
model_entry
.name
,
engine
)
?
;
...
...
lib/llm/src/engines.rs
View file @
c103d56a
...
...
@@ -30,7 +30,7 @@ use crate::preprocessor::PreprocessedRequest;
use
crate
::
protocols
::
common
::
llm_backend
::
LLMEngineOutput
;
use
crate
::
protocols
::
openai
::{
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
},
completions
::{
prompt_to_string
,
CompletionRe
quest
,
CompletionRe
sponse
},
completions
::{
prompt_to_string
,
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
},
};
use
crate
::
types
::
openai
::
embeddings
::
NvCreateEmbeddingRequest
;
use
crate
::
types
::
openai
::
embeddings
::
NvCreateEmbeddingResponse
;
...
...
@@ -140,7 +140,7 @@ impl<E> EngineDispatcher<E> {
pub
trait
StreamingEngine
:
Send
+
Sync
{
async
fn
handle_completion
(
&
self
,
req
:
SingleIn
<
CompletionRequest
>
,
req
:
SingleIn
<
NvCreate
CompletionRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
;
async
fn
handle_chat
(
...
...
@@ -218,12 +218,12 @@ impl
}
#[async_trait]
impl
AsyncEngine
<
SingleIn
<
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
impl
AsyncEngine
<
SingleIn
<
NvCreate
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
for
EchoEngineFull
{
async
fn
generate
(
&
self
,
incoming_request
:
SingleIn
<
CompletionRequest
>
,
incoming_request
:
SingleIn
<
NvCreate
CompletionRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
{
let
(
request
,
context
)
=
incoming_request
.transfer
(());
let
deltas
=
request
.response_generator
();
...
...
@@ -265,8 +265,11 @@ impl
#[async_trait]
impl
<
E
>
StreamingEngine
for
EngineDispatcher
<
E
>
where
E
:
AsyncEngine
<
SingleIn
<
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
+
AsyncEngine
<
E
:
AsyncEngine
<
SingleIn
<
NvCreateCompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
,
>
+
AsyncEngine
<
SingleIn
<
NvCreateChatCompletionRequest
>
,
ManyOut
<
Annotated
<
NvCreateChatCompletionStreamResponse
>>
,
Error
,
...
...
@@ -279,7 +282,7 @@ where
{
async
fn
handle_completion
(
&
self
,
req
:
SingleIn
<
CompletionRequest
>
,
req
:
SingleIn
<
NvCreate
CompletionRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
{
self
.inner
.generate
(
req
)
.await
}
...
...
@@ -343,12 +346,12 @@ impl StreamingEngineAdapter {
}
#[async_trait]
impl
AsyncEngine
<
SingleIn
<
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
impl
AsyncEngine
<
SingleIn
<
NvCreate
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
for
StreamingEngineAdapter
{
async
fn
generate
(
&
self
,
req
:
SingleIn
<
CompletionRequest
>
,
req
:
SingleIn
<
NvCreate
CompletionRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
{
self
.0
.handle_completion
(
req
)
.await
}
...
...
lib/llm/src/http/service/openai.rs
View file @
c103d56a
...
...
@@ -33,7 +33,9 @@ use crate::protocols::openai::{
};
use
crate
::
request_template
::
RequestTemplate
;
use
crate
::
types
::{
openai
::{
chat_completions
::
NvCreateChatCompletionRequest
,
completions
::
CompletionRequest
},
openai
::{
chat_completions
::
NvCreateChatCompletionRequest
,
completions
::
NvCreateCompletionRequest
,
},
Annotated
,
};
...
...
@@ -120,7 +122,7 @@ impl From<HttpError> for ErrorResponse {
#[tracing::instrument(skip_all)]
async
fn
completions
(
State
(
state
):
State
<
Arc
<
service_v2
::
State
>>
,
Json
(
request
):
Json
<
CompletionRequest
>
,
Json
(
request
):
Json
<
NvCreate
CompletionRequest
>
,
)
->
Result
<
Response
,
(
StatusCode
,
Json
<
ErrorResponse
>
)
>
{
// return a 503 if the service is not ready
check_ready
(
&
state
)
?
;
...
...
@@ -137,7 +139,7 @@ async fn completions(
..
request
.inner
};
let
request
=
CompletionRequest
{
let
request
=
NvCreate
CompletionRequest
{
inner
,
nvext
:
request
.nvext
,
};
...
...
lib/llm/src/preprocessor.rs
View file @
c103d56a
...
...
@@ -46,7 +46,7 @@ use crate::protocols::{
common
::{
SamplingOptionsProvider
,
StopConditionsProvider
},
openai
::{
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
},
completions
::{
CompletionRe
quest
,
CompletionRe
sponse
},
completions
::{
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
},
nvext
::
NvExtProvider
,
DeltaGeneratorExt
,
},
...
...
@@ -341,7 +341,7 @@ impl
#[async_trait]
impl
Operator
<
SingleIn
<
CompletionRequest
>
,
SingleIn
<
NvCreate
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
SingleIn
<
PreprocessedRequest
>
,
ManyOut
<
Annotated
<
BackendOutput
>>
,
...
...
@@ -349,7 +349,7 @@ impl
{
async
fn
generate
(
&
self
,
request
:
SingleIn
<
CompletionRequest
>
,
request
:
SingleIn
<
NvCreate
CompletionRequest
>
,
next
:
Arc
<
dyn
AsyncEngine
<
SingleIn
<
PreprocessedRequest
>
,
...
...
lib/llm/src/preprocessor/prompt/template/oai.rs
View file @
c103d56a
...
...
@@ -18,7 +18,7 @@ use super::*;
use
minijinja
::{
context
,
value
::
Value
};
use
crate
::
protocols
::
openai
::{
chat_completions
::
NvCreateChatCompletionRequest
,
completions
::
CompletionRequest
,
chat_completions
::
NvCreateChatCompletionRequest
,
completions
::
NvCreate
CompletionRequest
,
};
use
tracing
;
...
...
@@ -55,7 +55,7 @@ impl OAIChatLikeRequest for NvCreateChatCompletionRequest {
}
}
impl
OAIChatLikeRequest
for
CompletionRequest
{
impl
OAIChatLikeRequest
for
NvCreate
CompletionRequest
{
fn
messages
(
&
self
)
->
minijinja
::
value
::
Value
{
let
message
=
async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
async_openai
::
types
::
ChatCompletionRequestUserMessage
{
...
...
lib/llm/src/protocols/openai/completions.rs
View file @
c103d56a
...
...
@@ -34,7 +34,7 @@ use super::{
use
dynamo_runtime
::
protocols
::
annotated
::
AnnotationsProvider
;
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
CompletionRequest
{
pub
struct
NvCreate
CompletionRequest
{
#[serde(flatten)]
pub
inner
:
async_openai
::
types
::
CreateCompletionRequest
,
...
...
@@ -141,7 +141,7 @@ pub fn prompt_to_string(prompt: &async_openai::types::Prompt) -> String {
}
}
impl
NvExtProvider
for
CompletionRequest
{
impl
NvExtProvider
for
NvCreate
CompletionRequest
{
fn
nvext
(
&
self
)
->
Option
<&
NvExt
>
{
self
.nvext
.as_ref
()
}
...
...
@@ -158,7 +158,7 @@ impl NvExtProvider for CompletionRequest {
}
}
impl
AnnotationsProvider
for
CompletionRequest
{
impl
AnnotationsProvider
for
NvCreate
CompletionRequest
{
fn
annotations
(
&
self
)
->
Option
<
Vec
<
String
>>
{
self
.nvext
.as_ref
()
...
...
@@ -174,7 +174,7 @@ impl AnnotationsProvider for CompletionRequest {
}
}
impl
OpenAISamplingOptionsProvider
for
CompletionRequest
{
impl
OpenAISamplingOptionsProvider
for
NvCreate
CompletionRequest
{
fn
get_temperature
(
&
self
)
->
Option
<
f32
>
{
self
.inner.temperature
}
...
...
@@ -196,7 +196,7 @@ impl OpenAISamplingOptionsProvider for CompletionRequest {
}
}
impl
OpenAIStopConditionsProvider
for
CompletionRequest
{
impl
OpenAIStopConditionsProvider
for
NvCreate
CompletionRequest
{
fn
get_max_tokens
(
&
self
)
->
Option
<
u32
>
{
self
.inner.max_tokens
}
...
...
@@ -255,10 +255,10 @@ impl ResponseFactory {
}
/// Implements TryFrom for converting an OpenAI's CompletionRequest to an Engine's CompletionRequest
impl
TryFrom
<
CompletionRequest
>
for
common
::
CompletionRequest
{
impl
TryFrom
<
NvCreate
CompletionRequest
>
for
common
::
CompletionRequest
{
type
Error
=
anyhow
::
Error
;
fn
try_from
(
request
:
CompletionRequest
)
->
Result
<
Self
,
Self
::
Error
>
{
fn
try_from
(
request
:
NvCreate
CompletionRequest
)
->
Result
<
Self
,
Self
::
Error
>
{
// openai_api_rs::v1::completion::CompletionRequest {
// NA pub model: String,
// pub prompt: String,
...
...
lib/llm/src/protocols/openai/completions/delta.rs
View file @
c103d56a
...
...
@@ -13,11 +13,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use
super
::{
CompletionChoice
,
CompletionRe
quest
,
CompletionRe
sponse
};
use
super
::{
CompletionChoice
,
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
};
use
crate
::
protocols
::
common
;
use
crate
::
protocols
::
openai
::
CompletionUsage
;
impl
CompletionRequest
{
impl
NvCreate
CompletionRequest
{
// put this method on the request
// inspect the request to extract options
pub
fn
response_generator
(
&
self
)
->
DeltaGenerator
{
...
...
lib/llm/src/types.rs
View file @
c103d56a
...
...
@@ -24,14 +24,15 @@ pub mod openai {
pub
mod
completions
{
use
super
::
*
;
pub
use
protocols
::
openai
::
completions
::{
CompletionRe
quest
,
CompletionRe
sponse
};
pub
use
protocols
::
openai
::
completions
::{
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
};
/// A [`UnaryEngine`] implementation for the OpenAI Completions API
pub
type
OpenAICompletionsUnaryEngine
=
UnaryEngine
<
CompletionRequest
,
CompletionResponse
>
;
pub
type
OpenAICompletionsUnaryEngine
=
UnaryEngine
<
NvCreateCompletionRequest
,
CompletionResponse
>
;
/// A [`ServerStreamingEngine`] implementation for the OpenAI Completions API
pub
type
OpenAICompletionsStreamingEngine
=
ServerStreamingEngine
<
CompletionRequest
,
Annotated
<
CompletionResponse
>>
;
ServerStreamingEngine
<
NvCreate
CompletionRequest
,
Annotated
<
CompletionResponse
>>
;
}
pub
mod
chat_completions
{
...
...
lib/llm/tests/http-service.rs
View file @
c103d56a
...
...
@@ -24,7 +24,7 @@ use dynamo_llm::http::service::{
use
dynamo_llm
::
protocols
::{
openai
::{
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
},
completions
::{
CompletionRe
quest
,
CompletionRe
sponse
},
completions
::{
CompletionRe
sponse
,
NvCreate
CompletionRe
quest
},
},
Annotated
,
};
...
...
@@ -101,12 +101,12 @@ impl
}
#[async_trait]
impl
AsyncEngine
<
SingleIn
<
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
impl
AsyncEngine
<
SingleIn
<
NvCreate
CompletionRequest
>
,
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
for
AlwaysFailEngine
{
async
fn
generate
(
&
self
,
_
request
:
SingleIn
<
CompletionRequest
>
,
_
request
:
SingleIn
<
NvCreate
CompletionRequest
>
,
)
->
Result
<
ManyOut
<
Annotated
<
CompletionResponse
>>
,
Error
>
{
Err
(
HttpError
{
code
:
401
,
...
...
lib/llm/tests/openai_completions.rs
View file @
c103d56a
...
...
@@ -14,12 +14,12 @@
// limitations under the License.
use
async_openai
::
types
::
CreateCompletionRequestArgs
;
use
dynamo_llm
::
protocols
::
openai
::{
self
,
completions
::
CompletionRequest
};
use
dynamo_llm
::
protocols
::
openai
::{
self
,
completions
::
NvCreate
CompletionRequest
};
use
serde
::{
Deserialize
,
Serialize
};
#[derive(Serialize,
Deserialize,
Debug,
Clone)]
struct
CompletionSample
{
request
:
CompletionRequest
,
request
:
NvCreate
CompletionRequest
,
description
:
String
,
}
...
...
@@ -36,7 +36,7 @@ impl CompletionSample {
let
inner
=
builder
.build
()
.unwrap
();
let
request
=
CompletionRequest
{
inner
,
nvext
:
None
};
let
request
=
NvCreate
CompletionRequest
{
inner
,
nvext
:
None
};
Ok
(
Self
{
request
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment