frontend.py 3.53 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

16
import logging
17
import signal
18
import subprocess
19
import sys
20
from pathlib import Path
21

22
23
from components.processor import Processor
from components.worker import VllmWorker
24
25
from pydantic import BaseModel

26
from dynamo import sdk
27
28
from dynamo.sdk import depends, service
from dynamo.sdk.lib.config import ServiceConfig
29
from dynamo.sdk.lib.image import DYNAMO_IMAGE
30

31
32
logger = logging.getLogger(__name__)

33

34
35
36
37
38
39
40
41
42
def get_http_binary_path():
    sdk_path = Path(sdk.__file__)
    binary_path = sdk_path.parent / "cli/bin/http"
    if not binary_path.exists():
        return "http"
    else:
        return str(binary_path)


43
class FrontendConfig(BaseModel):
44
    served_model_name: str
45
46
47
48
    endpoint: str
    port: int = 8080


49
# todo this should be called ApiServer
50
51
52
@service(
    resources={"cpu": "10", "memory": "20Gi"},
    workers=1,
53
    image=DYNAMO_IMAGE,
54
55
56
57
58
59
60
61
)
class Frontend:
    worker = depends(VllmWorker)
    processor = depends(Processor)

    def __init__(self):
        config = ServiceConfig.get_instance()
        frontend_config = FrontendConfig(**config.get("Frontend", {}))
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
        self.frontend_config = frontend_config
        self.process = None

        signal.signal(signal.SIGTERM, self.handle_exit)
        signal.signal(signal.SIGINT, self.handle_exit)

        # Initial setup
        self.setup_model()
        self.start_http_server()

        try:
            if self.process:
                self.process.wait()
        except KeyboardInterrupt:
            self.cleanup()
77

78
    def setup_model(self):
79
        subprocess.run(
80
81
82
83
84
            [
                "llmctl",
                "http",
                "remove",
                "chat-models",
85
                self.frontend_config.served_model_name,
86
            ]
87
        )
88
        # Add the model
89
90
91
92
93
94
        subprocess.run(
            [
                "llmctl",
                "http",
                "add",
                "chat-models",
95
96
                self.frontend_config.served_model_name,
                self.frontend_config.endpoint,
97
98
99
            ]
        )

100
    def start_http_server(self):
101
        logger.info("Starting HTTP server")
102
        http_binary = get_http_binary_path()
103
104
105
106
        self.process = subprocess.Popen(
            [http_binary, "-p", str(self.frontend_config.port)],
            stdout=None,
            stderr=None,
107
        )
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128

    def cleanup(self):
        logger.info("Cleaning up before shutdown...")
        subprocess.run(
            [
                "llmctl",
                "http",
                "remove",
                "chat-models",
                self.frontend_config.served_model_name,
            ]
        )
        if self.process:
            logger.info("Terminating HTTP process")
            self.process.terminate()
            self.process.wait(timeout=10)

    def handle_exit(self, signum, frame):
        logger.debug(f"Received signal {signum}, shutting down...")
        self.cleanup()
        sys.exit(0)