Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
1f6ccc7f
Commit
1f6ccc7f
authored
Mar 19, 2025
by
ishandhanani
Committed by
GitHub
Mar 19, 2025
Browse files
feat: `Frontend` component uses served_model_name instead of model (#302)
parent
476174f3
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
31 additions
and
15 deletions
+31
-15
docs/guides/dynamo_serve.md
docs/guides/dynamo_serve.md
+2
-2
examples/llm/components/frontend.py
examples/llm/components/frontend.py
+9
-5
examples/llm/configs/agg.yaml
examples/llm/configs/agg.yaml
+3
-1
examples/llm/configs/agg_router.yaml
examples/llm/configs/agg_router.yaml
+3
-1
examples/llm/configs/disagg.yaml
examples/llm/configs/disagg.yaml
+9
-5
examples/llm/configs/disagg_router.yaml
examples/llm/configs/disagg_router.yaml
+5
-1
No files found.
docs/guides/dynamo_serve.md
View file @
1f6ccc7f
...
...
@@ -163,7 +163,7 @@ This will print out something like
Service Configuration:
{
"Frontend"
:
{
"
model
"
:
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
"
served_model_name
"
:
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
"endpoint"
:
"dynamo.Processor.chat/completions"
,
"port"
: 8000
}
,
...
...
@@ -189,7 +189,7 @@ Service Configuration:
}
Environment Variable that would be
set
:
DYNAMO_SERVICE_CONFIG
={
"Frontend"
:
{
"
model
"
:
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
"endpoint"
:
"dynamo.Processor.chat/completions"
,
"port"
: 8000
}
,
"Processor"
:
{
"model"
:
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
"block-size"
: 64,
DYNAMO_SERVICE_CONFIG
={
"Frontend"
:
{
"
served_model_name
"
:
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
"endpoint"
:
"dynamo.Processor.chat/completions"
,
"port"
: 8000
}
,
"Processor"
:
{
"model"
:
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
"block-size"
: 64,
"max-model-len"
: 16384,
"router"
:
"round-robin"
}
,
"VllmWorker"
:
{
"model"
:
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
"enforce-eager"
:
true
,
"block-size"
: 64,
"max-model-len"
: 16384,
"max-num-batched-tokens"
: 16384,
"enable-prefix-caching"
:
true
,
"router"
:
"random"
,
"tensor-parallel-size"
: 1,
"ServiceArgs"
:
{
"workers"
: 1
}}}
```
...
...
examples/llm/components/frontend.py
View file @
1f6ccc7f
...
...
@@ -17,7 +17,6 @@ import subprocess
from
pathlib
import
Path
from
components.processor
import
Processor
from
components.routerless.worker
import
VllmWorkerRouterLess
from
components.worker
import
VllmWorker
from
pydantic
import
BaseModel
...
...
@@ -37,7 +36,7 @@ def get_http_binary_path():
class
FrontendConfig
(
BaseModel
):
model
:
str
served_model_name
:
str
endpoint
:
str
port
:
int
=
8080
...
...
@@ -50,7 +49,6 @@ class FrontendConfig(BaseModel):
# todo this should be called ApiServer
class
Frontend
:
worker
=
depends
(
VllmWorker
)
worker_routerless
=
depends
(
VllmWorkerRouterLess
)
processor
=
depends
(
Processor
)
def
__init__
(
self
):
...
...
@@ -58,7 +56,13 @@ class Frontend:
frontend_config
=
FrontendConfig
(
**
config
.
get
(
"Frontend"
,
{}))
subprocess
.
run
(
[
"llmctl"
,
"http"
,
"remove"
,
"chat-models"
,
frontend_config
.
model
]
[
"llmctl"
,
"http"
,
"remove"
,
"chat-models"
,
frontend_config
.
served_model_name
,
]
)
subprocess
.
run
(
[
...
...
@@ -66,7 +70,7 @@ class Frontend:
"http"
,
"add"
,
"chat-models"
,
frontend_config
.
model
,
frontend_config
.
served_model_name
,
frontend_config
.
endpoint
,
]
)
...
...
examples/llm/configs/agg.yaml
View file @
1f6ccc7f
...
...
@@ -14,7 +14,7 @@
# limitations under the License.
Frontend
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
served_model_name
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint
:
dynamo.Processor.chat/completions
port
:
8000
...
...
@@ -35,3 +35,5 @@ VllmWorker:
tensor-parallel-size
:
1
ServiceArgs
:
workers
:
1
resources
:
gpu
:
1
examples/llm/configs/agg_router.yaml
View file @
1f6ccc7f
...
...
@@ -14,7 +14,7 @@
# limitations under the License.
Frontend
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
served_model_name
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint
:
dynamo.Processor.chat/completions
port
:
8000
...
...
@@ -40,3 +40,5 @@ VllmWorker:
tensor-parallel-size
:
1
ServiceArgs
:
workers
:
1
resources
:
gpu
:
1
examples/llm/configs/disagg.yaml
View file @
1f6ccc7f
...
...
@@ -15,7 +15,7 @@
Frontend
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
served_model_name
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint
:
dynamo.Processor.chat/completions
port
:
8000
...
...
@@ -24,19 +24,23 @@ Processor:
router
:
round-robin
VllmWorker
:
# vllm enging args
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
kv-transfer-config
:
'
{"kv_connector":"DynamoNixlConnector"}'
max-model-len
:
16384
# dynamo args
remote-prefill
:
true
conditional-disagg
:
true
max-local-prefill-length
:
10
ServiceArgs
:
workers
:
1
resources
:
gpu
:
1
PrefillWorker
:
# vllm enging args
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
kv-transfer-config
:
'
{"kv_connector":"DynamoNixlConnector"}'
max-model-len
:
16384
max-num-batched-tokens
:
16384
ServiceArgs
:
workers
:
1
resources
:
gpu
:
1
examples/llm/configs/disagg_router.yaml
View file @
1f6ccc7f
...
...
@@ -14,7 +14,7 @@
# limitations under the License.
Frontend
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
served_model_name
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint
:
dynamo.Processor.chat/completions
port
:
8000
...
...
@@ -48,3 +48,7 @@ PrefillWorker:
block-size
:
64
max-model-len
:
16384
max-num-batched-tokens
:
16384
ServiceArgs
:
workers
:
1
resources
:
gpu
:
1
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment