"examples/python_rs/__init__.py" did not exist on "08fcd7e93ba5df3093a8b54fe79e0895fe7a5f15"
Commit bb4e819c authored by Alec's avatar Alec Committed by GitHub
Browse files

fix: frontend now cleans up model from llmctl (#588)

parent a40aae9e
...@@ -14,7 +14,9 @@ ...@@ -14,7 +14,9 @@
# limitations under the License. # limitations under the License.
import logging import logging
import signal
import subprocess import subprocess
import sys
from pathlib import Path from pathlib import Path
from components.processor import Processor from components.processor import Processor
...@@ -44,12 +46,12 @@ class FrontendConfig(BaseModel): ...@@ -44,12 +46,12 @@ class FrontendConfig(BaseModel):
port: int = 8080 port: int = 8080
# todo this should be called ApiServer
@service( @service(
resources={"cpu": "10", "memory": "20Gi"}, resources={"cpu": "10", "memory": "20Gi"},
workers=1, workers=1,
image=DYNAMO_IMAGE, image=DYNAMO_IMAGE,
) )
# todo this should be called ApiServer
class Frontend: class Frontend:
worker = depends(VllmWorker) worker = depends(VllmWorker)
processor = depends(Processor) processor = depends(Processor)
...@@ -57,34 +59,70 @@ class Frontend: ...@@ -57,34 +59,70 @@ class Frontend:
def __init__(self): def __init__(self):
config = ServiceConfig.get_instance() config = ServiceConfig.get_instance()
frontend_config = FrontendConfig(**config.get("Frontend", {})) frontend_config = FrontendConfig(**config.get("Frontend", {}))
self.frontend_config = frontend_config
self.process = None
signal.signal(signal.SIGTERM, self.handle_exit)
signal.signal(signal.SIGINT, self.handle_exit)
# Initial setup
self.setup_model()
self.start_http_server()
try:
if self.process:
self.process.wait()
except KeyboardInterrupt:
self.cleanup()
def setup_model(self):
subprocess.run( subprocess.run(
[ [
"llmctl", "llmctl",
"http", "http",
"remove", "remove",
"chat-models", "chat-models",
frontend_config.served_model_name, self.frontend_config.served_model_name,
] ]
) )
# Add the model
subprocess.run( subprocess.run(
[ [
"llmctl", "llmctl",
"http", "http",
"add", "add",
"chat-models", "chat-models",
frontend_config.served_model_name, self.frontend_config.served_model_name,
frontend_config.endpoint, self.frontend_config.endpoint,
] ]
) )
def start_http_server(self):
logger.info("Starting HTTP server") logger.info("Starting HTTP server")
http_binary = get_http_binary_path() http_binary = get_http_binary_path()
process = subprocess.Popen( self.process = subprocess.Popen(
[http_binary, "-p", str(frontend_config.port)], stdout=None, stderr=None [http_binary, "-p", str(self.frontend_config.port)],
stdout=None,
stderr=None,
) )
try:
process.wait() def cleanup(self):
except KeyboardInterrupt: logger.info("Cleaning up before shutdown...")
process.terminate() subprocess.run(
process.wait() [
"llmctl",
"http",
"remove",
"chat-models",
self.frontend_config.served_model_name,
]
)
if self.process:
logger.info("Terminating HTTP process")
self.process.terminate()
self.process.wait(timeout=10)
def handle_exit(self, signum, frame):
logger.debug(f"Received signal {signum}, shutting down...")
self.cleanup()
sys.exit(0)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment