Unverified Commit aba3ab03 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

fix: add ingress to llm example (#1349)

parent a2ed85a2
......@@ -232,10 +232,10 @@ Once the deployment is complete, you can test it using:
export FRONTEND_POD=$(kubectl get pods -n ${KUBE_NS} | grep "${DEPLOYMENT_NAME}-frontend" | sort -k1 | tail -n1 | awk '{print $1}')
# Forward the pod's port to localhost
kubectl port-forward pod/$FRONTEND_POD 8000:8000 -n ${KUBE_NS}
kubectl port-forward pod/$FRONTEND_POD 3000:3000 -n ${KUBE_NS}
# Test the API endpoint
curl localhost:8000/v1/chat/completions \
curl localhost:3000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
......
......@@ -14,6 +14,7 @@
# limitations under the License.
import logging
import os
import subprocess
from pathlib import Path
......@@ -23,12 +24,15 @@ from components.worker import VllmWorker
from pydantic import BaseModel
from dynamo import sdk
from dynamo.sdk import async_on_shutdown, depends, service
from dynamo.sdk import api, async_on_shutdown, depends, service
from dynamo.sdk.lib.config import ServiceConfig
from dynamo.sdk.lib.image import DYNAMO_IMAGE
logger = logging.getLogger(__name__)
# TODO: temp workaround to avoid port conflict with subprocess HTTP server; remove this once ingress is fixed
os.environ["DYNAMO_PORT"] = "3999"
def get_http_binary_path():
"""Find the HTTP binary path in SDK or fallback to 'http' command."""
......@@ -105,6 +109,17 @@ class Frontend:
stderr=None,
)
@api()
def dummy_api(self) -> None:
"""
Dummy API to enable the HTTP server for the Dynamo operator.
This API is not used by the model.
NOTE: this is a temporary solution to expose ingress
for the LLM examples. Will be fixed and removed in the future.
The resulting api_endpoints in dynamo.yaml will be incorrect.
"""
@async_on_shutdown
def cleanup(self):
"""Clean up resources before shutdown."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment