Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
1f6ccc7f
Commit
1f6ccc7f
authored
Mar 19, 2025
by
ishandhanani
Committed by
GitHub
Mar 19, 2025
Browse files
feat: `Frontend` component uses served_model_name instead of model (#302)
parent
476174f3
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
31 additions
and
15 deletions
+31
-15
docs/guides/dynamo_serve.md
docs/guides/dynamo_serve.md
+2
-2
examples/llm/components/frontend.py
examples/llm/components/frontend.py
+9
-5
examples/llm/configs/agg.yaml
examples/llm/configs/agg.yaml
+3
-1
examples/llm/configs/agg_router.yaml
examples/llm/configs/agg_router.yaml
+3
-1
examples/llm/configs/disagg.yaml
examples/llm/configs/disagg.yaml
+9
-5
examples/llm/configs/disagg_router.yaml
examples/llm/configs/disagg_router.yaml
+5
-1
No files found.
docs/guides/dynamo_serve.md
View file @
1f6ccc7f
...
@@ -163,7 +163,7 @@ This will print out something like
...
@@ -163,7 +163,7 @@ This will print out something like
Service Configuration:
Service Configuration:
{
{
"Frontend"
:
{
"Frontend"
:
{
"
model
"
:
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
"
served_model_name
"
:
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
"endpoint"
:
"dynamo.Processor.chat/completions"
,
"endpoint"
:
"dynamo.Processor.chat/completions"
,
"port"
: 8000
"port"
: 8000
}
,
}
,
...
@@ -189,7 +189,7 @@ Service Configuration:
...
@@ -189,7 +189,7 @@ Service Configuration:
}
}
Environment Variable that would be
set
:
Environment Variable that would be
set
:
DYNAMO_SERVICE_CONFIG
={
"Frontend"
:
{
"
model
"
:
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
"endpoint"
:
"dynamo.Processor.chat/completions"
,
"port"
: 8000
}
,
"Processor"
:
{
"model"
:
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
"block-size"
: 64,
DYNAMO_SERVICE_CONFIG
={
"Frontend"
:
{
"
served_model_name
"
:
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
"endpoint"
:
"dynamo.Processor.chat/completions"
,
"port"
: 8000
}
,
"Processor"
:
{
"model"
:
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
"block-size"
: 64,
"max-model-len"
: 16384,
"router"
:
"round-robin"
}
,
"VllmWorker"
:
{
"model"
:
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
"enforce-eager"
:
true
,
"block-size"
: 64,
"max-model-len"
: 16384,
"max-num-batched-tokens"
: 16384,
"enable-prefix-caching"
:
"max-model-len"
: 16384,
"router"
:
"round-robin"
}
,
"VllmWorker"
:
{
"model"
:
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
"enforce-eager"
:
true
,
"block-size"
: 64,
"max-model-len"
: 16384,
"max-num-batched-tokens"
: 16384,
"enable-prefix-caching"
:
true
,
"router"
:
"random"
,
"tensor-parallel-size"
: 1,
"ServiceArgs"
:
{
"workers"
: 1
}}}
true
,
"router"
:
"random"
,
"tensor-parallel-size"
: 1,
"ServiceArgs"
:
{
"workers"
: 1
}}}
```
```
...
...
examples/llm/components/frontend.py
View file @
1f6ccc7f
...
@@ -17,7 +17,6 @@ import subprocess
...
@@ -17,7 +17,6 @@ import subprocess
from
pathlib
import
Path
from
pathlib
import
Path
from
components.processor
import
Processor
from
components.processor
import
Processor
from
components.routerless.worker
import
VllmWorkerRouterLess
from
components.worker
import
VllmWorker
from
components.worker
import
VllmWorker
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
...
@@ -37,7 +36,7 @@ def get_http_binary_path():
...
@@ -37,7 +36,7 @@ def get_http_binary_path():
class
FrontendConfig
(
BaseModel
):
class
FrontendConfig
(
BaseModel
):
model
:
str
served_model_name
:
str
endpoint
:
str
endpoint
:
str
port
:
int
=
8080
port
:
int
=
8080
...
@@ -50,7 +49,6 @@ class FrontendConfig(BaseModel):
...
@@ -50,7 +49,6 @@ class FrontendConfig(BaseModel):
# todo this should be called ApiServer
# todo this should be called ApiServer
class
Frontend
:
class
Frontend
:
worker
=
depends
(
VllmWorker
)
worker
=
depends
(
VllmWorker
)
worker_routerless
=
depends
(
VllmWorkerRouterLess
)
processor
=
depends
(
Processor
)
processor
=
depends
(
Processor
)
def
__init__
(
self
):
def
__init__
(
self
):
...
@@ -58,7 +56,13 @@ class Frontend:
...
@@ -58,7 +56,13 @@ class Frontend:
frontend_config
=
FrontendConfig
(
**
config
.
get
(
"Frontend"
,
{}))
frontend_config
=
FrontendConfig
(
**
config
.
get
(
"Frontend"
,
{}))
subprocess
.
run
(
subprocess
.
run
(
[
"llmctl"
,
"http"
,
"remove"
,
"chat-models"
,
frontend_config
.
model
]
[
"llmctl"
,
"http"
,
"remove"
,
"chat-models"
,
frontend_config
.
served_model_name
,
]
)
)
subprocess
.
run
(
subprocess
.
run
(
[
[
...
@@ -66,7 +70,7 @@ class Frontend:
...
@@ -66,7 +70,7 @@ class Frontend:
"http"
,
"http"
,
"add"
,
"add"
,
"chat-models"
,
"chat-models"
,
frontend_config
.
model
,
frontend_config
.
served_model_name
,
frontend_config
.
endpoint
,
frontend_config
.
endpoint
,
]
]
)
)
...
...
examples/llm/configs/agg.yaml
View file @
1f6ccc7f
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
# limitations under the License.
# limitations under the License.
Frontend
:
Frontend
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
served_model_name
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint
:
dynamo.Processor.chat/completions
endpoint
:
dynamo.Processor.chat/completions
port
:
8000
port
:
8000
...
@@ -35,3 +35,5 @@ VllmWorker:
...
@@ -35,3 +35,5 @@ VllmWorker:
tensor-parallel-size
:
1
tensor-parallel-size
:
1
ServiceArgs
:
ServiceArgs
:
workers
:
1
workers
:
1
resources
:
gpu
:
1
examples/llm/configs/agg_router.yaml
View file @
1f6ccc7f
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
# limitations under the License.
# limitations under the License.
Frontend
:
Frontend
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
served_model_name
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint
:
dynamo.Processor.chat/completions
endpoint
:
dynamo.Processor.chat/completions
port
:
8000
port
:
8000
...
@@ -40,3 +40,5 @@ VllmWorker:
...
@@ -40,3 +40,5 @@ VllmWorker:
tensor-parallel-size
:
1
tensor-parallel-size
:
1
ServiceArgs
:
ServiceArgs
:
workers
:
1
workers
:
1
resources
:
gpu
:
1
examples/llm/configs/disagg.yaml
View file @
1f6ccc7f
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
Frontend
:
Frontend
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
served_model_name
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint
:
dynamo.Processor.chat/completions
endpoint
:
dynamo.Processor.chat/completions
port
:
8000
port
:
8000
...
@@ -24,19 +24,23 @@ Processor:
...
@@ -24,19 +24,23 @@ Processor:
router
:
round-robin
router
:
round-robin
VllmWorker
:
VllmWorker
:
# vllm enging args
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
kv-transfer-config
:
'
{"kv_connector":"DynamoNixlConnector"}'
kv-transfer-config
:
'
{"kv_connector":"DynamoNixlConnector"}'
max-model-len
:
16384
max-model-len
:
16384
# dynamo args
remote-prefill
:
true
remote-prefill
:
true
conditional-disagg
:
true
conditional-disagg
:
true
max-local-prefill-length
:
10
max-local-prefill-length
:
10
ServiceArgs
:
workers
:
1
resources
:
gpu
:
1
PrefillWorker
:
PrefillWorker
:
# vllm enging args
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
kv-transfer-config
:
'
{"kv_connector":"DynamoNixlConnector"}'
kv-transfer-config
:
'
{"kv_connector":"DynamoNixlConnector"}'
max-model-len
:
16384
max-model-len
:
16384
max-num-batched-tokens
:
16384
max-num-batched-tokens
:
16384
ServiceArgs
:
workers
:
1
resources
:
gpu
:
1
examples/llm/configs/disagg_router.yaml
View file @
1f6ccc7f
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
# limitations under the License.
# limitations under the License.
Frontend
:
Frontend
:
model
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
served_model_name
:
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
endpoint
:
dynamo.Processor.chat/completions
endpoint
:
dynamo.Processor.chat/completions
port
:
8000
port
:
8000
...
@@ -48,3 +48,7 @@ PrefillWorker:
...
@@ -48,3 +48,7 @@ PrefillWorker:
block-size
:
64
block-size
:
64
max-model-len
:
16384
max-model-len
:
16384
max-num-batched-tokens
:
16384
max-num-batched-tokens
:
16384
ServiceArgs
:
workers
:
1
resources
:
gpu
:
1
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment