Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
12d73a82
Commit
12d73a82
authored
Feb 28, 2025
by
NVShreyas
Committed by
GitHub
Feb 28, 2025
Browse files
Updates to support DS R1 in TRTLLM example (#301)
parent
e584e96f
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
23 additions
and
18 deletions
+23
-18
container/Dockerfile
container/Dockerfile
+1
-1
deploy/Kubernetes/_build/tester.containerfile
deploy/Kubernetes/_build/tester.containerfile
+1
-1
deploy/docker-compose.yml
deploy/docker-compose.yml
+1
-1
examples/python_rs/llm/tensorrt_llm/common/parser.py
examples/python_rs/llm/tensorrt_llm/common/parser.py
+3
-0
examples/python_rs/llm/tensorrt_llm/disaggregated/llmapi_disaggregated_configs/single_node_config.yaml
...ated/llmapi_disaggregated_configs/single_node_config.yaml
+10
-10
examples/python_rs/llm/tensorrt_llm/disaggregated/worker.py
examples/python_rs/llm/tensorrt_llm/disaggregated/worker.py
+7
-5
No files found.
container/Dockerfile
View file @
12d73a82
# SPDX-FileCopyrightText: Copyright (c)
2024-
2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
...
...
deploy/Kubernetes/_build/tester.containerfile
View file @
12d73a82
# SPDX-FileCopyrightText: Copyright (c)
2024-
2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
...
...
deploy/docker-compose.yml
View file @
12d73a82
# SPDX-FileCopyrightText: Copyright (c)
2024-
2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
...
...
examples/python_rs/llm/tensorrt_llm/common/parser.py
View file @
12d73a82
...
@@ -16,6 +16,7 @@
...
@@ -16,6 +16,7 @@
import
argparse
import
argparse
import
json
import
json
import
os
import
os
from
pathlib
import
Path
from
typing
import
Any
,
Dict
,
Tuple
from
typing
import
Any
,
Dict
,
Tuple
# Define the expected keys for each config
# Define the expected keys for each config
...
@@ -65,6 +66,8 @@ def _get_llm_args(args_dict):
...
@@ -65,6 +66,8 @@ def _get_llm_args(args_dict):
}
}
if
"model"
not
in
llm_engine_args
:
if
"model"
not
in
llm_engine_args
:
raise
ValueError
(
"Model name is required in the TRT-LLM engine config."
)
raise
ValueError
(
"Model name is required in the TRT-LLM engine config."
)
if
os
.
path
.
exists
(
llm_engine_args
[
"model"
]):
llm_engine_args
[
"model"
]
=
Path
(
llm_engine_args
[
"model"
])
return
(
pytorch_config_args
,
llm_engine_args
)
return
(
pytorch_config_args
,
llm_engine_args
)
...
...
examples/python_rs/llm/tensorrt_llm/disaggregated/llmapi_disaggregated_configs/single_node_config.yaml
View file @
12d73a82
...
@@ -13,22 +13,22 @@
...
@@ -13,22 +13,22 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
model
:
TinyLlama/TinyLlama-1.1B-Chat-v1.0
hostname
:
localhost
hostname
:
localhost
port
:
8000
port
:
8000
backend
:
"
pytorch"
context_servers
:
context_servers
:
num_instances
:
2
num_instances
:
2
gpu_fraction
:
0.25
tensor_parallel_size
:
2
tp_size
:
1
kv_cache_config
:
pp_size
:
1
free_gpu_memory_fraction
:
0.2
urls
:
urls
:
-
"
localhost:8001"
-
"
localhost:8001"
-
"
localhost:8002"
-
"
localhost:8002"
generation_servers
:
generation_servers
:
num_instances
:
1
num_instances
:
2
gpu_fraction
:
0.25
tensor_parallel_size
:
2
tp_size
:
1
kv_cache_config
:
pp_size
:
1
free_gpu_memory_fraction
:
0.2
urls
:
urls
:
-
"
localhost:8002"
-
"
localhost:8003"
-
"
localhost:8004"
\ No newline at end of file
examples/python_rs/llm/tensorrt_llm/disaggregated/worker.py
View file @
12d73a82
...
@@ -104,15 +104,17 @@ class TensorrtLLMEngine:
...
@@ -104,15 +104,17 @@ class TensorrtLLMEngine:
None
,
None
,
lambda
:
LLM
(
lambda
:
LLM
(
**
self
.
llm_engine_args
,
**
self
.
llm_engine_args
,
tensor_parallel_size
=
self
.
server_config
.
other_args
[
"tp_size"
],
tensor_parallel_size
=
self
.
server_config
.
other_args
.
get
(
pipeline_parallel_size
=
self
.
server_config
.
other_args
[
"pp_size"
],
"tensor_parallel_size"
,
1
),
pipeline_parallel_size
=
self
.
server_config
.
other_args
.
get
(
"pipeline_parallel_size"
,
1
),
gpus_per_node
=
None
,
gpus_per_node
=
None
,
trust_remote_code
=
True
,
trust_remote_code
=
True
,
_mpi_session
=
self
.
mpi_session
,
_mpi_session
=
self
.
mpi_session
,
kv_cache_config
=
KvCacheConfig
(
kv_cache_config
=
KvCacheConfig
(
free_gpu_memory_fraction
=
self
.
server_config
.
other_args
[
**
self
.
server_config
.
other_args
.
get
(
"kv_cache_config"
,
{})
"gpu_fraction"
]
),
),
pytorch_backend_config
=
pytorch_config
,
pytorch_backend_config
=
pytorch_config
,
backend
=
"pytorch"
,
backend
=
"pytorch"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment