Unverified Commit 5ca570f9 authored by Graham King's avatar Graham King Committed by GitHub
Browse files

chore: Rename dynamo.ingress to dynamo.frontend (#1944)

parent 7b9182fd
# Dynamo ingress / frontend node.
# Dynamo frontend node.
Usage: `python -m dynamo.ingress [--http-port <port>]`. Port defaults to 8080.
Usage: `python -m dynamo.frontend [--http-port 8080]`.
This runs an OpenAI compliant HTTP server, a pre-processor, and a router in a single process. Engines / workers are auto-discovered when they call `register_llm`.
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from dynamo.ingress.main import main
from dynamo.frontend.main import main
if __name__ == "__main__":
main()
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# Usage: `python -m dynamo.ingress [args]`
# Usage: `python -m dynamo.frontend [args]`
#
# Start a frontend node. This runs:
# - OpenAI HTTP server.
......@@ -23,19 +23,16 @@ def parse_args():
description="Dynamo Frontend: HTTP+Pre-processor+Router",
formatter_class=argparse.RawTextHelpFormatter, # To preserve multi-line help formatting
)
parser.add_argument(
"-i", "--interactive", action="store_true", help="Interactive text chat"
)
parser.add_argument(
"--kv-cache-block-size", type=int, help="KV cache block size (u32)."
)
parser.add_argument(
"--http-port", type=int, default=8080, help="HTTP port for the engine (u16)."
)
flags = parser.parse_args()
kwargs = {"http_port": flags.http_port}
if flags.kv_cache_block_size is not None:
kwargs["kv_cache_block_size"] = flags.kv_cache_block_size
return kwargs
return parser.parse_args()
async def async_main():
......@@ -43,12 +40,18 @@ async def async_main():
flags = parse_args()
# out=dyn
e = EntrypointArgs(EngineType.Dynamic, **flags)
e = EntrypointArgs(
EngineType.Dynamic,
http_port=flags.http_port,
kv_cache_block_size=flags.kv_cache_block_size,
)
engine = await make_engine(runtime, e)
# in=http
try:
await run_input(runtime, "http", engine)
if flags.interactive:
await run_input(runtime, "text", engine)
else:
await run_input(runtime, "http", engine)
except asyncio.exceptions.CancelledError:
pass
......
......@@ -79,7 +79,7 @@ requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["deploy/sdk/src/dynamo", "components/planner/src/dynamo", "components/ingress/src/dynamo", "components/backends/llama_cpp/src/dynamo"]
packages = ["deploy/sdk/src/dynamo", "components/planner/src/dynamo", "components/frontend/src/dynamo", "components/backends/llama_cpp/src/dynamo"]
# This section is for including the binaries in the wheel package
# but doesn't make them executable scripts in the venv bin directory
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment