Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
df51a622
Commit
df51a622
authored
Mar 16, 2025
by
ishandhanani
Committed by
GitHub
Mar 17, 2025
Browse files
chore: refactor examples and clean CLI (#195)
parent
0517f757
Changes
29
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
19 additions
and
60 deletions
+19
-60
deploy/examples/llm/configs/disagg.yaml
deploy/examples/llm/configs/disagg.yaml
+14
-9
deploy/examples/llm/configs/disagg_router.yaml
deploy/examples/llm/configs/disagg_router.yaml
+1
-1
deploy/examples/llm/configs/monolith/routerless_deployment.yaml
.../examples/llm/configs/monolith/routerless_deployment.yaml
+0
-26
deploy/examples/llm/disaggregated/routerless_deployment.py
deploy/examples/llm/disaggregated/routerless_deployment.py
+0
-20
deploy/examples/llm/graphs/__init__.py
deploy/examples/llm/graphs/__init__.py
+0
-0
deploy/examples/llm/graphs/agg.py
deploy/examples/llm/graphs/agg.py
+0
-0
deploy/examples/llm/graphs/agg_router.py
deploy/examples/llm/graphs/agg_router.py
+0
-1
deploy/examples/llm/graphs/disagg.py
deploy/examples/llm/graphs/disagg.py
+4
-3
deploy/examples/llm/graphs/disagg_router.py
deploy/examples/llm/graphs/disagg_router.py
+0
-0
No files found.
deploy/examples/llm/configs/disagg
regated/routerless_deployment
.yaml
→
deploy/examples/llm/configs/disagg.yaml
View file @
df51a622
...
@@ -16,22 +16,29 @@
...
@@ -16,22 +16,29 @@
Frontend
:
Frontend
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint
:
dynamo
-init.VllmWorkerRouterLess.generate
endpoint
:
dynamo
.Processor.chat/completions
port
:
8000
port
:
8000
VllmWorkerRouterLess
:
Processor
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
block-size
:
64
max-model-len
:
16384
router
:
round-robin
VllmWorker
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
enforce-eager
:
true
enforce-eager
:
true
kv-transfer-config
:
'
{"kv_connector":"DynamoNixlConnector"}'
kv-transfer-config
:
'
{"kv_connector":"DynamoNixlConnector"}'
block-size
:
64
block-size
:
64
max-model-len
:
16384
max-model-len
:
16384
max-num-batched-tokens
:
16384
max-num-batched-tokens
:
16384
remote-prefill
:
true
conditional-disagg
:
true
ServiceArgs
:
tensor-parallel-size
:
1
workers
:
1
remote_prefill
:
true
max_local_prefill_length
:
10
PrefillWorkerRouterLess
:
# TODO - set all of these but model as default
PrefillWorker
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
enforce-eager
:
true
enforce-eager
:
true
kv-transfer-config
:
'
{"kv_connector":"DynamoNixlConnector"}'
kv-transfer-config
:
'
{"kv_connector":"DynamoNixlConnector"}'
...
@@ -39,5 +46,3 @@ PrefillWorkerRouterLess:
...
@@ -39,5 +46,3 @@ PrefillWorkerRouterLess:
max-model-len
:
16384
max-model-len
:
16384
max-num-batched-tokens
:
16384
max-num-batched-tokens
:
16384
cuda-visible-device-offset
:
1
cuda-visible-device-offset
:
1
ServiceArgs
:
workers
:
1
deploy/examples/llm/configs/disagg
regated/router_based_deployment
.yaml
→
deploy/examples/llm/configs/disagg
_router
.yaml
View file @
df51a622
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
Frontend
:
Frontend
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint
:
dynamo
-init
.Processor.chat/completions
endpoint
:
dynamo.Processor.chat/completions
port
:
8000
port
:
8000
Processor
:
Processor
:
...
...
deploy/examples/llm/configs/monolith/routerless_deployment.yaml
deleted
100644 → 0
View file @
0517f757
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Frontend
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint
:
dynamo-init.VllmWorkerRouterLess.generate
port
:
8000
VllmWorkerRouterLess
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
enforce-eager
:
true
max-model-len
:
16384
max-num-batched-tokens
:
16384
block-size
:
64
\ No newline at end of file
deploy/examples/llm/disaggregated/routerless_deployment.py
deleted
100644 → 0
View file @
0517f757
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
components.frontend
import
Frontend
from
components.routerless.prefill_worker
import
PrefillWorkerRouterLess
from
components.routerless.worker
import
VllmWorkerRouterLess
Frontend
.
link
(
VllmWorkerRouterLess
).
link
(
PrefillWorkerRouterLess
)
deploy/examples/llm/graphs/__init__.py
0 → 100644
View file @
df51a622
deploy/examples/llm/
monolith/routerless_processor_deployment
.py
→
deploy/examples/llm/
graphs/agg
.py
View file @
df51a622
File moved
deploy/examples/llm/
monolith/router_based_deployment
.py
→
deploy/examples/llm/
graphs/agg_router
.py
View file @
df51a622
...
@@ -18,5 +18,4 @@ from components.kv_router import Router
...
@@ -18,5 +18,4 @@ from components.kv_router import Router
from
components.processor
import
Processor
from
components.processor
import
Processor
from
components.worker
import
VllmWorker
from
components.worker
import
VllmWorker
# Monolith with Router
Frontend
.
link
(
Processor
).
link
(
Router
).
link
(
VllmWorker
)
Frontend
.
link
(
Processor
).
link
(
Router
).
link
(
VllmWorker
)
deploy/examples/llm/
monolith/routerless_deployment
.py
→
deploy/examples/llm/
graphs/disagg
.py
View file @
df51a622
...
@@ -14,7 +14,8 @@
...
@@ -14,7 +14,8 @@
# limitations under the License.
# limitations under the License.
from
components.frontend
import
Frontend
from
components.frontend
import
Frontend
from
components.routerless.worker
import
VllmWorkerRouterLess
from
components.prefill_worker
import
PrefillWorker
from
components.processor
import
Processor
from
components.worker
import
VllmWorker
# Monolith without Router
Frontend
.
link
(
Processor
).
link
(
VllmWorker
).
link
(
PrefillWorker
)
Frontend
.
link
(
VllmWorkerRouterLess
)
deploy/examples/llm/
disaggregated/router_based_deployment
.py
→
deploy/examples/llm/
graphs/disagg_router
.py
View file @
df51a622
File moved
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment