Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
480b41d1
"vllm/vscode:/vscode.git/clone" did not exist on "ea6102b85da808b23055912391977f43fbe3f227"
Unverified
Commit
480b41d1
authored
Jul 14, 2025
by
Graham King
Committed by
GitHub
Jul 14, 2025
Browse files
feat: Python frontend / ingress node (#1912)
parent
f00d700e
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
78 additions
and
3 deletions
+78
-3
components/ingress/README
components/ingress/README
+9
-0
components/ingress/src/dynamo/ingress/__init__.py
components/ingress/src/dynamo/ingress/__init__.py
+0
-0
components/ingress/src/dynamo/ingress/__main__.py
components/ingress/src/dynamo/ingress/__main__.py
+7
-0
components/ingress/src/dynamo/ingress/main.py
components/ingress/src/dynamo/ingress/main.py
+61
-0
pyproject.toml
pyproject.toml
+1
-1
tests/serve/test_dynamo_serve.py
tests/serve/test_dynamo_serve.py
+0
-2
No files found.
components/ingress/README
0 → 100644
View file @
480b41d1
# Dynamo ingress / frontend node.
Usage: `python -m dynamo.ingress [--http-port <port>]`. Port defaults to 8080.
This runs an OpenAI compliant HTTP server, a pre-processor, and a router in a single process. Engines / workers are auto-discovered when they call `register_llm`.
Requires `etcd` and `nats-server -js`.
This is the same as `dynamo-run in=http out=dyn`.
components/ingress/src/dynamo/ingress/__init__.py
0 → 100644
View file @
480b41d1
components/ingress/src/dynamo/ingress/__main__.py
0 → 100644
View file @
480b41d1
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from
dynamo.ingress.main
import
main
if
__name__
==
"__main__"
:
main
()
components/ingress/src/dynamo/ingress/main.py
0 → 100644
View file @
480b41d1
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# Usage: `python -m dynamo.ingress [args]`
#
# Start a frontend node. This runs:
# - OpenAI HTTP server.
# - Auto-discovery: Watches etcd for engine/worker registration (via `register_llm`).
# - Pre-processor: Prompt templating and tokenization.
# - Router, defaulting to round-robin (TODO: Add flags to enable KV routing).
import
argparse
import
asyncio
import
uvloop
from
dynamo.llm
import
EngineType
,
EntrypointArgs
,
make_engine
,
run_input
from
dynamo.runtime
import
DistributedRuntime
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
"Dynamo Frontend: HTTP+Pre-processor+Router"
,
formatter_class
=
argparse
.
RawTextHelpFormatter
,
# To preserve multi-line help formatting
)
parser
.
add_argument
(
"--kv-cache-block-size"
,
type
=
int
,
help
=
"KV cache block size (u32)."
)
parser
.
add_argument
(
"--http-port"
,
type
=
int
,
default
=
8080
,
help
=
"HTTP port for the engine (u16)."
)
flags
=
parser
.
parse_args
()
kwargs
=
{
"http_port"
:
flags
.
http_port
}
if
flags
.
kv_cache_block_size
is
not
None
:
kwargs
[
"kv_cache_block_size"
]
=
flags
.
kv_cache_block_size
return
kwargs
async
def
async_main
():
runtime
=
DistributedRuntime
(
asyncio
.
get_running_loop
(),
False
)
flags
=
parse_args
()
# out=dyn
e
=
EntrypointArgs
(
EngineType
.
Dynamic
,
**
flags
)
engine
=
await
make_engine
(
runtime
,
e
)
# in=http
try
:
await
run_input
(
runtime
,
"http"
,
engine
)
except
asyncio
.
exceptions
.
CancelledError
:
pass
def
main
():
uvloop
.
run
(
async_main
())
if
__name__
==
"__main__"
:
main
()
pyproject.toml
View file @
480b41d1
...
...
@@ -81,7 +81,7 @@ requires = ["hatchling"]
build-backend
=
"hatchling.build"
[tool.hatch.build.targets.wheel]
packages
=
[
"deploy/sdk/src/dynamo"
,
"components/planner/src/dynamo"
]
packages
=
[
"deploy/sdk/src/dynamo"
,
"components/planner/src/dynamo"
,
"components/ingress/src/dynamo"
]
# This section is for including the binaries in the wheel package
# but doesn't make them executable scripts in the venv bin directory
...
...
tests/serve/test_dynamo_serve.py
View file @
480b41d1
...
...
@@ -285,7 +285,6 @@ class DynamoServeProcess(ManagedProcess):
(
f
"http://localhost:
{
port
}
/v1/models"
,
self
.
_check_model
)
]
health_check_ports
=
[
port
]
env
=
None
self
.
port
=
port
self
.
graph
=
graph
...
...
@@ -305,7 +304,6 @@ class DynamoServeProcess(ManagedProcess):
"from multiprocessing.spawn"
,
],
log_dir
=
request
.
node
.
name
,
env
=
env
,
# Pass the environment variables
)
def
_check_model
(
self
,
response
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment