Unverified Commit 643659c6 authored by Alec's avatar Alec Committed by GitHub
Browse files

fix: frontend and http server signal handling (#677)

parent 1c77531a
...@@ -14,9 +14,7 @@ ...@@ -14,9 +14,7 @@
# limitations under the License. # limitations under the License.
import logging import logging
import signal
import subprocess import subprocess
import sys
from pathlib import Path from pathlib import Path
from components.processor import Processor from components.processor import Processor
...@@ -24,7 +22,7 @@ from components.worker import VllmWorker ...@@ -24,7 +22,7 @@ from components.worker import VllmWorker
from pydantic import BaseModel from pydantic import BaseModel
from dynamo import sdk from dynamo import sdk
from dynamo.sdk import depends, service from dynamo.sdk import async_on_shutdown, depends, service
from dynamo.sdk.lib.config import ServiceConfig from dynamo.sdk.lib.config import ServiceConfig
from dynamo.sdk.lib.image import DYNAMO_IMAGE from dynamo.sdk.lib.image import DYNAMO_IMAGE
...@@ -32,6 +30,7 @@ logger = logging.getLogger(__name__) ...@@ -32,6 +30,7 @@ logger = logging.getLogger(__name__)
def get_http_binary_path(): def get_http_binary_path():
"""Find the HTTP binary path in SDK or fallback to 'http' command."""
sdk_path = Path(sdk.__file__) sdk_path = Path(sdk.__file__)
binary_path = sdk_path.parent / "cli/bin/http" binary_path = sdk_path.parent / "cli/bin/http"
if not binary_path.exists(): if not binary_path.exists():
...@@ -41,6 +40,8 @@ def get_http_binary_path(): ...@@ -41,6 +40,8 @@ def get_http_binary_path():
class FrontendConfig(BaseModel): class FrontendConfig(BaseModel):
"""Configuration for the Frontend service including model and HTTP server settings."""
served_model_name: str served_model_name: str
endpoint: str endpoint: str
port: int = 8080 port: int = 8080
...@@ -57,25 +58,17 @@ class Frontend: ...@@ -57,25 +58,17 @@ class Frontend:
processor = depends(Processor) processor = depends(Processor)
def __init__(self): def __init__(self):
"""Initialize Frontend service with HTTP server and model configuration."""
config = ServiceConfig.get_instance() config = ServiceConfig.get_instance()
frontend_config = FrontendConfig(**config.get("Frontend", {})) frontend_config = FrontendConfig(**config.get("Frontend", {}))
self.frontend_config = frontend_config self.frontend_config = frontend_config
self.process = None self.process = None
signal.signal(signal.SIGTERM, self.handle_exit)
signal.signal(signal.SIGINT, self.handle_exit)
# Initial setup
self.setup_model() self.setup_model()
self.start_http_server() self.start_http_server()
try:
if self.process:
self.process.wait()
except KeyboardInterrupt:
self.cleanup()
def setup_model(self): def setup_model(self):
"""Configure the model for HTTP service using llmctl."""
subprocess.run( subprocess.run(
[ [
"llmctl", "llmctl",
...@@ -83,9 +76,9 @@ class Frontend: ...@@ -83,9 +76,9 @@ class Frontend:
"remove", "remove",
"chat-models", "chat-models",
self.frontend_config.served_model_name, self.frontend_config.served_model_name,
] ],
check=False,
) )
# Add the model
subprocess.run( subprocess.run(
[ [
"llmctl", "llmctl",
...@@ -94,20 +87,26 @@ class Frontend: ...@@ -94,20 +87,26 @@ class Frontend:
"chat-models", "chat-models",
self.frontend_config.served_model_name, self.frontend_config.served_model_name,
self.frontend_config.endpoint, self.frontend_config.endpoint,
] ],
check=False,
) )
def start_http_server(self): def start_http_server(self):
"""Start the HTTP server on the configured port."""
logger.info("Starting HTTP server") logger.info("Starting HTTP server")
http_binary = get_http_binary_path() http_binary = get_http_binary_path()
self.process = subprocess.Popen( self.process = subprocess.Popen(
[http_binary, "-p", str(self.frontend_config.port)], [http_binary, "-p", str(self.frontend_config.port)],
stdout=None, stdout=None,
stderr=None, stderr=None,
) )
@async_on_shutdown
def cleanup(self): def cleanup(self):
logger.info("Cleaning up before shutdown...") """Clean up resources before shutdown."""
# circusd manages shutdown of http server process, we just need to remove the model using the on_shutdown hook
subprocess.run( subprocess.run(
[ [
"llmctl", "llmctl",
...@@ -115,14 +114,6 @@ class Frontend: ...@@ -115,14 +114,6 @@ class Frontend:
"remove", "remove",
"chat-models", "chat-models",
self.frontend_config.served_model_name, self.frontend_config.served_model_name,
] ],
check=False,
) )
if self.process:
logger.info("Terminating HTTP process")
self.process.terminate()
self.process.wait(timeout=10)
def handle_exit(self, signum, frame):
logger.debug(f"Received signal {signum}, shutting down...")
self.cleanup()
sys.exit(0)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment