Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
1af7433b
Commit
1af7433b
authored
Mar 05, 2025
by
Neelay Shah
Committed by
GitHub
Mar 05, 2025
Browse files
refactor: rename triton_distributed to dynemo (#22)
Co-authored-by:
Graham King
<
grahamk@nvidia.com
>
parent
ee4ef06b
Changes
165
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
37 additions
and
40 deletions
+37
-40
lib/llm/src/preprocessor.rs
lib/llm/src/preprocessor.rs
+3
-3
lib/llm/src/protocols.rs
lib/llm/src/protocols.rs
+5
-5
lib/llm/src/protocols/common/preprocessor.rs
lib/llm/src/protocols/common/preprocessor.rs
+1
-1
lib/llm/src/protocols/openai.rs
lib/llm/src/protocols/openai.rs
+0
-1
lib/llm/src/protocols/openai/chat_completions.rs
lib/llm/src/protocols/openai/chat_completions.rs
+1
-1
lib/llm/src/protocols/openai/completions.rs
lib/llm/src/protocols/openai/completions.rs
+1
-1
lib/llm/src/tokenizers/README.md
lib/llm/src/tokenizers/README.md
+4
-4
lib/llm/src/types.rs
lib/llm/src/types.rs
+1
-1
lib/llm/tests/aggregators.rs
lib/llm/tests/aggregators.rs
+2
-2
lib/llm/tests/backend.rs
lib/llm/tests/backend.rs
+2
-2
lib/llm/tests/http-service.rs
lib/llm/tests/http-service.rs
+6
-6
lib/llm/tests/model_card.rs
lib/llm/tests/model_card.rs
+1
-3
lib/llm/tests/openai_completions.rs
lib/llm/tests/openai_completions.rs
+1
-1
lib/llm/tests/preprocessor.rs
lib/llm/tests/preprocessor.rs
+3
-3
lib/llm/tests/snapshots/openai_completions__minimum_viable_request.snap
...snapshots/openai_completions__minimum_viable_request.snap
+1
-1
lib/llm/tests/snapshots/openai_completions__valid_samples-10.snap
...tests/snapshots/openai_completions__valid_samples-10.snap
+1
-1
lib/llm/tests/snapshots/openai_completions__valid_samples-3.snap
.../tests/snapshots/openai_completions__valid_samples-3.snap
+1
-1
lib/llm/tests/snapshots/openai_completions__valid_samples-4.snap
.../tests/snapshots/openai_completions__valid_samples-4.snap
+1
-1
lib/llm/tests/snapshots/openai_completions__valid_samples-7.snap
.../tests/snapshots/openai_completions__valid_samples-7.snap
+1
-1
lib/llm/tests/snapshots/openai_completions__valid_samples-9.snap
.../tests/snapshots/openai_completions__valid_samples-9.snap
+1
-1
No files found.
lib/llm/src/preprocessor.rs
View file @
1af7433b
...
...
@@ -35,11 +35,11 @@ use tracing;
use
crate
::
model_card
::
model
::{
ModelDeploymentCard
,
ModelInfo
,
TokenizerKind
};
use
crate
::
preprocessor
::
prompt
::
OAIChatLikeRequest
;
use
triton_distributed
_runtime
::
engine
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ResponseStream
};
use
triton_distributed
_runtime
::
pipeline
::{
use
dynemo
_runtime
::
engine
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ResponseStream
};
use
dynemo
_runtime
::
pipeline
::{
async_trait
,
AsyncEngineContext
,
Error
,
ManyOut
,
Operator
,
SingleIn
,
};
use
triton_distributed
_runtime
::
protocols
::
annotated
::{
Annotated
,
AnnotationsProvider
};
use
dynemo
_runtime
::
protocols
::
annotated
::{
Annotated
,
AnnotationsProvider
};
use
crate
::
protocols
::{
common
::{
SamplingOptionsProvider
,
StopConditionsProvider
},
...
...
lib/llm/src/protocols.rs
View file @
1af7433b
...
...
@@ -13,10 +13,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! #
Triton
LLM Protocols
//! #
Dynemo
LLM Protocols
//!
//! This module contains the protocols, i.e. messages formats, used to exchange requests and responses
//! both publicly via the HTTP API and internally between
Triton
components.
//! both publicly via the HTTP API and internally between
Dynemo
components.
//!
use
futures
::{
Stream
,
StreamExt
};
...
...
@@ -33,10 +33,10 @@ pub type DataStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync>>;
// TODO: This is an awkward dependency that we need to address
// Originally, all the Annotated/SSE Codec bits where in the LLM protocol module; however, [Annotated]
// has become the common response envelope for
triton
-distributed.
// has become the common response envelope for
dynemo
-distributed.
// We may want to move the original Annotated back here and has a Infallible conversion to the the
// ResponseEnvelop in
triton
-distributed.
pub
use
triton_distributed
_runtime
::
protocols
::
annotated
::
Annotated
;
// ResponseEnvelop in
dynemo
-distributed.
pub
use
dynemo
_runtime
::
protocols
::
annotated
::
Annotated
;
/// The LLM responses have multiple different fields and nests of objects to get to the actual
/// text completion returned. This trait can be applied to the `choice` level objects to extract
...
...
lib/llm/src/protocols/common/preprocessor.rs
View file @
1af7433b
...
...
@@ -19,7 +19,7 @@ use serde::{Deserialize, Serialize};
use
super
::{
SamplingOptions
,
StopConditions
};
use
crate
::
protocols
::
TokenIdType
;
/// [`PreprocessedRequest`] is the internal representation of an LLM request. The [`
triton
-llm-preprocessor`]
/// [`PreprocessedRequest`] is the internal representation of an LLM request. The [`
dynemo
-llm-preprocessor`]
/// crate is responsible for converting request from the public APIs to this internal representation.
#[derive(Serialize,
Deserialize,
Debug,
Clone,
Builder)]
pub
struct
PreprocessedRequest
{
...
...
lib/llm/src/protocols/openai.rs
View file @
1af7433b
...
...
@@ -13,7 +13,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
/// Forward openai_api_rs::v1 to triton_distributed_llm::protocols::openai::v1
pub
mod
chat_completions
;
pub
mod
completions
;
pub
mod
models
;
...
...
lib/llm/src/protocols/openai/chat_completions.rs
View file @
1af7433b
...
...
@@ -17,8 +17,8 @@ use super::nvext::NvExt;
use
super
::
nvext
::
NvExtProvider
;
use
super
::
OpenAISamplingOptionsProvider
;
use
super
::
OpenAIStopConditionsProvider
;
use
dynemo_runtime
::
protocols
::
annotated
::
AnnotationsProvider
;
use
serde
::{
Deserialize
,
Serialize
};
use
triton_distributed_runtime
::
protocols
::
annotated
::
AnnotationsProvider
;
use
validator
::
Validate
;
mod
aggregator
;
...
...
lib/llm/src/protocols/openai/completions.rs
View file @
1af7433b
...
...
@@ -31,7 +31,7 @@ use super::{
CompletionUsage
,
ContentProvider
,
OpenAISamplingOptionsProvider
,
OpenAIStopConditionsProvider
,
};
use
triton_distributed
_runtime
::
protocols
::
annotated
::
AnnotationsProvider
;
use
dynemo
_runtime
::
protocols
::
annotated
::
AnnotationsProvider
;
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
CompletionRequest
{
...
...
lib/llm/src/tokenizers/README.md
View file @
1af7433b
...
...
@@ -13,7 +13,7 @@
#### HuggingFace Tokenizer
```
rust
use
triton_distributed
_llm
::
tokenizers
::
hf
::
HuggingFaceTokenizer
;
use
dynemo
_llm
::
tokenizers
::
hf
::
HuggingFaceTokenizer
;
let
hf_tokenizer
=
HuggingFaceTokenizer
::
from_file
(
"tests/data/sample-models/TinyLlama_v1.1/tokenizer.json"
)
.expect
(
"Failed to load HuggingFace tokenizer"
);
...
...
@@ -22,7 +22,7 @@ let hf_tokenizer = HuggingFaceTokenizer::from_file("tests/data/sample-models/Tin
### Encoding and Decoding Text
```
rust
use
triton_distributed
_llm
::
tokenizers
::{
HuggingFaceTokenizer
,
traits
::{
Encoder
,
Decoder
}};
use
dynemo
_llm
::
tokenizers
::{
HuggingFaceTokenizer
,
traits
::{
Encoder
,
Decoder
}};
let
tokenizer
=
HuggingFaceTokenizer
::
from_file
(
"tests/data/sample-models/TinyLlama_v1.1/tokenizer.json"
)
.expect
(
"Failed to load HuggingFace tokenizer"
);
...
...
@@ -40,7 +40,7 @@ assert_eq!(text, decoded_text);
// Using the Sequence object for encoding and decoding
use
triton_distributed
_llm
::
tokenizers
::{
Sequence
,
Tokenizer
};
use
dynemo
_llm
::
tokenizers
::{
Sequence
,
Tokenizer
};
use
std
::
sync
::{
Arc
,
RwLock
};
let
tokenizer
=
Tokenizer
::
from
(
Arc
::
new
(
tokenizer
));
...
...
@@ -51,4 +51,4 @@ sequence.append_text("Your sample text here")
let
delta
=
sequence
.append_token_id
(
1337
)
.expect
(
"Failed to append token_id"
);
```
\ No newline at end of file
```
lib/llm/src/types.rs
View file @
1af7433b
...
...
@@ -19,7 +19,7 @@ pub use protocols::{Annotated, TokenIdType};
pub
mod
openai
{
use
super
::
*
;
use
triton_distributed
_runtime
::
pipeline
::{
ServerStreamingEngine
,
UnaryEngine
};
use
dynemo
_runtime
::
pipeline
::{
ServerStreamingEngine
,
UnaryEngine
};
pub
mod
completions
{
use
super
::
*
;
...
...
lib/llm/tests/aggregators.rs
View file @
1af7433b
...
...
@@ -13,12 +13,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use
futures
::
StreamExt
;
use
triton_distributed_llm
::
protocols
::{
use
dynemo_llm
::
protocols
::{
codec
::{
create_message_stream
,
Message
,
SseCodecError
},
openai
::{
chat_completions
::
NvCreateChatCompletionResponse
,
completions
::
CompletionResponse
},
ContentProvider
,
DataStream
,
};
use
futures
::
StreamExt
;
const
CMPL_ROOT_PATH
:
&
str
=
"tests/data/replays/meta/llama-3.1-8b-instruct/completions"
;
const
CHAT_ROOT_PATH
:
&
str
=
"tests/data/replays/meta/llama-3.1-8b-instruct/chat_completions"
;
...
...
lib/llm/tests/backend.rs
View file @
1af7433b
...
...
@@ -13,8 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use
triton_distributed
_llm
::
backend
::
Backend
;
use
triton_distributed
_llm
::
model_card
::
model
::
ModelDeploymentCard
;
use
dynemo
_llm
::
backend
::
Backend
;
use
dynemo
_llm
::
model_card
::
model
::
ModelDeploymentCard
;
#[tokio::test]
async
fn
test_sequence_factory
()
{
...
...
lib/llm/tests/http-service.rs
View file @
1af7433b
...
...
@@ -15,28 +15,28 @@
use
anyhow
::
Error
;
use
async_stream
::
stream
;
use
prometheus
::{
proto
::
MetricType
,
Registry
};
use
reqwest
::
StatusCode
;
use
std
::
sync
::
Arc
;
use
triton_distributed_llm
::
http
::
service
::{
use
dynemo_llm
::
http
::
service
::{
error
::
HttpError
,
metrics
::{
Endpoint
,
RequestType
,
Status
},
service_v2
::
HttpService
,
Metrics
,
};
use
triton_distributed
_llm
::
protocols
::{
use
dynemo
_llm
::
protocols
::{
openai
::{
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionStreamResponse
},
completions
::{
CompletionRequest
,
CompletionResponse
},
},
Annotated
,
};
use
triton_distributed
_runtime
::{
use
dynemo
_runtime
::{
pipeline
::{
async_trait
,
AsyncEngine
,
AsyncEngineContextProvider
,
ManyOut
,
ResponseStream
,
SingleIn
,
},
CancellationToken
,
};
use
prometheus
::{
proto
::
MetricType
,
Registry
};
use
reqwest
::
StatusCode
;
use
std
::
sync
::
Arc
;
struct
CounterEngine
{}
...
...
lib/llm/tests/model_card.rs
View file @
1af7433b
...
...
@@ -13,10 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use
dynemo_llm
::
model_card
::
model
::{
ModelDeploymentCard
,
PromptFormatterArtifact
,
TokenizerKind
};
use
tempfile
::
tempdir
;
use
triton_distributed_llm
::
model_card
::
model
::{
ModelDeploymentCard
,
PromptFormatterArtifact
,
TokenizerKind
,
};
const
HF_PATH
:
&
str
=
"tests/data/sample-models/TinyLlama_v1.1"
;
...
...
lib/llm/tests/openai_completions.rs
View file @
1af7433b
...
...
@@ -14,8 +14,8 @@
// limitations under the License.
use
async_openai
::
types
::
CreateCompletionRequestArgs
;
use
dynemo_llm
::
protocols
::
openai
::{
self
,
completions
::
CompletionRequest
};
use
serde
::{
Deserialize
,
Serialize
};
use
triton_distributed_llm
::
protocols
::
openai
::{
self
,
completions
::
CompletionRequest
};
#[derive(Serialize,
Deserialize,
Debug,
Clone)]
struct
CompletionSample
{
...
...
lib/llm/tests/preprocessor.rs
View file @
1af7433b
...
...
@@ -15,10 +15,10 @@
use
anyhow
::
Ok
;
use
dynemo_llm
::
model_card
::
model
::{
ModelDeploymentCard
,
PromptContextMixin
};
use
dynemo_llm
::
preprocessor
::
prompt
::
PromptFormatter
;
use
dynemo_llm
::
protocols
::
openai
::
chat_completions
::
NvCreateChatCompletionRequest
;
use
serde
::{
Deserialize
,
Serialize
};
use
triton_distributed_llm
::
model_card
::
model
::{
ModelDeploymentCard
,
PromptContextMixin
};
use
triton_distributed_llm
::
preprocessor
::
prompt
::
PromptFormatter
;
use
triton_distributed_llm
::
protocols
::
openai
::
chat_completions
::
NvCreateChatCompletionRequest
;
use
hf_hub
::{
api
::
tokio
::
ApiBuilder
,
Cache
,
Repo
,
RepoType
};
...
...
lib/llm/tests/snapshots/openai_completions__minimum_viable_request.snap
View file @
1af7433b
---
source:
triton
-llm/tests/openai_completions.rs
source:
dynemo
-llm/tests/openai_completions.rs
expression: request
---
{
...
...
lib/llm/tests/snapshots/openai_completions__valid_samples-10.snap
View file @
1af7433b
---
source:
triton
-llm/tests/openai_completions.rs
source:
dynemo
-llm/tests/openai_completions.rs
description: "should have prompt, model, and logit_bias fields with the logits_bias having two key/value pairs"
expression: sample.request
---
...
...
lib/llm/tests/snapshots/openai_completions__valid_samples-3.snap
View file @
1af7433b
---
source:
triton
-llm/tests/openai_completions.rs
source:
dynemo
-llm/tests/openai_completions.rs
description: "should have prompt, model, and temperature fields"
expression: sample.request
---
...
...
lib/llm/tests/snapshots/openai_completions__valid_samples-4.snap
View file @
1af7433b
---
source:
triton
-llm/tests/openai_completions.rs
source:
dynemo
-llm/tests/openai_completions.rs
description: "should have prompt, model, and top_p fields"
expression: sample.request
---
...
...
lib/llm/tests/snapshots/openai_completions__valid_samples-7.snap
View file @
1af7433b
---
source:
triton
-llm/tests/openai_completions.rs
source:
dynemo
-llm/tests/openai_completions.rs
description: "should have prompt, model, and stop fields"
expression: sample.request
---
...
...
lib/llm/tests/snapshots/openai_completions__valid_samples-9.snap
View file @
1af7433b
---
source:
triton
-llm/tests/openai_completions.rs
source:
dynemo
-llm/tests/openai_completions.rs
description: "should have prompt, model, and stream fields"
expression: sample.request
---
...
...
Prev
1
…
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment