Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
32cd0048
Commit
32cd0048
authored
Jun 09, 2025
by
xuxz
Browse files
adaption DYNAMO for DCU
parent
b950ec54
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
27 additions
and
21 deletions
+27
-21
components/planner/src/dynamo/planner/local_connector.py
components/planner/src/dynamo/planner/local_connector.py
+1
-1
deploy/sdk/src/dynamo/sdk/cli/allocator.py
deploy/sdk/src/dynamo/sdk/cli/allocator.py
+4
-3
examples/llm/configs/agg.yaml
examples/llm/configs/agg.yaml
+3
-3
examples/llm/configs/disagg.yaml
examples/llm/configs/disagg.yaml
+10
-5
examples/llm/configs/disagg_router.yaml
examples/llm/configs/disagg_router.yaml
+5
-5
examples/llm/configs/mutinode_disagg_r1.yaml
examples/llm/configs/mutinode_disagg_r1.yaml
+1
-1
launch/llmctl/src/main.rs
launch/llmctl/src/main.rs
+3
-3
No files found.
components/planner/src/dynamo/planner/local_connector.py
View file @
32cd0048
...
...
@@ -148,7 +148,7 @@ class LocalConnector(PlannerConnector):
if
not
available_gpus
:
raise
ValueError
(
"No GPUs available for allocation"
)
gpu_id
=
available_gpus
[
0
]
watcher_env
[
"
CUDA
_VISIBLE_DEVICES"
]
=
gpu_id
watcher_env
[
"
HIP
_VISIBLE_DEVICES"
]
=
gpu_id
watcher_env
[
"DYNAMO_SERVICE_CONFIG"
]
=
service_config
...
...
deploy/sdk/src/dynamo/sdk/cli/allocator.py
View file @
32cd0048
...
...
@@ -207,7 +207,7 @@ class ResourceAllocator:
# Generate environment variables for each worker
for
_
in
range
(
num_workers
):
env_vars
=
{
"
CUDA
_VISIBLE_DEVICES"
:
","
.
join
(
map
(
str
,
assigned
))}
env_vars
=
{
"
HIP
_VISIBLE_DEVICES"
:
","
.
join
(
map
(
str
,
assigned
))}
resource_envs
.
append
(
env_vars
)
else
:
logger
.
info
(
...
...
@@ -221,7 +221,7 @@ class ResourceAllocator:
)
# Generate environment variables for this worker
env_vars
=
{
"
CUDA
_VISIBLE_DEVICES"
:
","
.
join
(
map
(
str
,
assigned
))}
env_vars
=
{
"
HIP
_VISIBLE_DEVICES"
:
","
.
join
(
map
(
str
,
assigned
))}
# If we have comprehensive GPU stats, log them
try
:
...
...
@@ -242,7 +242,8 @@ class ResourceAllocator:
logger
.
debug
(
f
"Failed to get GPU stats:
{
e
}
"
)
resource_envs
.
append
(
env_vars
)
# else:
# resource_envs = config["envs"]
logger
.
info
(
f
"Final resource allocation - workers:
{
num_workers
}
, envs:
{
resource_envs
}
"
)
...
...
examples/llm/configs/agg.yaml
View file @
32cd0048
...
...
@@ -13,12 +13,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
Common
:
model
:
deepseek-
ai
/DeepSeek-R1-Distill-Llama-8B
block-size
:
6
4
model
:
/models/
deepseek-
r1
/DeepSeek-R1-Distill-Llama-8B
block-size
:
1
6
max-model-len
:
16384
Frontend
:
served_model_name
:
deepseek-
ai
/DeepSeek-R1-Distill-Llama-8B
served_model_name
:
/models/
deepseek-
r1
/DeepSeek-R1-Distill-Llama-8B
endpoint
:
dynamo.Processor.chat/completions
port
:
8000
...
...
examples/llm/configs/disagg.yaml
View file @
32cd0048
...
...
@@ -13,13 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
Common
:
model
:
deepseek-
ai
/DeepSeek-R1-Distill-Llama-8B
block-size
:
6
4
model
:
/models/
deepseek-
r1
/DeepSeek-R1-Distill-Llama-8B
block-size
:
1
6
max-model-len
:
16384
kv-transfer-config
:
'
{"kv_connector":"DynamoNixlConnector"}'
Frontend
:
served_model_name
:
deepseek-
ai
/DeepSeek-R1-Distill-Llama-8B
served_model_name
:
/models/
deepseek-
r1
/DeepSeek-R1-Distill-Llama-8B
endpoint
:
dynamo.Processor.chat/completions
port
:
8000
...
...
@@ -29,21 +29,26 @@ Processor:
VllmWorker
:
remote-prefill
:
true
conditional-disagg
:
tru
e
conditional-disagg
:
fals
e
max-local-prefill-length
:
10
max-prefill-queue-size
:
2
max-prefill-queue-size
:
64
tensor-parallel-size
:
1
enable-prefix-caching
:
false
ServiceArgs
:
workers
:
1
resources
:
gpu
:
1
common-configs
:
[
model
,
block-size
,
max-model-len
,
kv-transfer-config
]
PrefillWorker
:
max-num-batched-tokens
:
16384
tensor-parallel-size
:
1
ServiceArgs
:
workers
:
1
resources
:
gpu
:
1
common-configs
:
[
model
,
block-size
,
max-model-len
,
kv-transfer-config
]
Planner
:
...
...
examples/llm/configs/disagg_router.yaml
View file @
32cd0048
...
...
@@ -13,14 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
Common
:
model
:
deepseek-
ai
/DeepSeek-R1-Distill-Llama-8B
block-size
:
6
4
model
:
/models/
deepseek-
r1
/DeepSeek-R1-Distill-Llama-8B
block-size
:
1
6
max-model-len
:
16384
router
:
kv
kv-transfer-config
:
'
{"kv_connector":"DynamoNixlConnector"}'
Frontend
:
served_model_name
:
deepseek-
ai
/DeepSeek-R1-Distill-Llama-8B
served_model_name
:
/models/
deepseek-
r1
/DeepSeek-R1-Distill-Llama-8B
endpoint
:
dynamo.Processor.chat/completions
port
:
8000
...
...
@@ -34,9 +34,9 @@ Router:
VllmWorker
:
max-num-batched-tokens
:
16384
remote-prefill
:
true
conditional-disagg
:
tru
e
conditional-disagg
:
fals
e
max-local-prefill-length
:
10
max-prefill-queue-size
:
2
max-prefill-queue-size
:
64
tensor-parallel-size
:
1
enable-prefix-caching
:
true
ServiceArgs
:
...
...
examples/llm/configs/mutinode_disagg_r1.yaml
View file @
32cd0048
...
...
@@ -14,7 +14,7 @@
# limitations under the License.
Common
:
model
:
deepseek-ai/DeepSeek-R1
block-size
:
6
4
block-size
:
1
6
max-model-len
:
16384
kv-transfer-config
:
'
{"kv_connector":"DynamoNixlConnector"}'
tensor-parallel-size
:
16
...
...
launch/llmctl/src/main.rs
View file @
32cd0048
...
...
@@ -228,9 +228,9 @@ async fn add_model(
endpoint_name
);
if
model_name
.starts_with
(
'/'
)
{
raise!
(
"Model name '{}' cannot start with a slash"
,
model_name
);
}
//
if model_name.starts_with('/') {
//
raise!("Model name '{}' cannot start with a slash", model_name);
//
}
let
parts
:
Vec
<&
str
>
=
endpoint_name
.split
(
'.'
)
.collect
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment