Unverified Commit aba3ab03 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

fix: add ingress to llm example (#1349)

parent a2ed85a2
...@@ -232,10 +232,10 @@ Once the deployment is complete, you can test it using: ...@@ -232,10 +232,10 @@ Once the deployment is complete, you can test it using:
export FRONTEND_POD=$(kubectl get pods -n ${KUBE_NS} | grep "${DEPLOYMENT_NAME}-frontend" | sort -k1 | tail -n1 | awk '{print $1}') export FRONTEND_POD=$(kubectl get pods -n ${KUBE_NS} | grep "${DEPLOYMENT_NAME}-frontend" | sort -k1 | tail -n1 | awk '{print $1}')
# Forward the pod's port to localhost # Forward the pod's port to localhost
kubectl port-forward pod/$FRONTEND_POD 8000:8000 -n ${KUBE_NS} kubectl port-forward pod/$FRONTEND_POD 3000:3000 -n ${KUBE_NS}
# Test the API endpoint # Test the API endpoint
curl localhost:8000/v1/chat/completions \ curl localhost:3000/v1/chat/completions \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{ -d '{
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
# limitations under the License. # limitations under the License.
import logging import logging
import os
import subprocess import subprocess
from pathlib import Path from pathlib import Path
...@@ -23,12 +24,15 @@ from components.worker import VllmWorker ...@@ -23,12 +24,15 @@ from components.worker import VllmWorker
from pydantic import BaseModel from pydantic import BaseModel
from dynamo import sdk from dynamo import sdk
from dynamo.sdk import async_on_shutdown, depends, service from dynamo.sdk import api, async_on_shutdown, depends, service
from dynamo.sdk.lib.config import ServiceConfig from dynamo.sdk.lib.config import ServiceConfig
from dynamo.sdk.lib.image import DYNAMO_IMAGE from dynamo.sdk.lib.image import DYNAMO_IMAGE
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# TODO: temp workaround to avoid port conflict with subprocess HTTP server; remove this once ingress is fixed
os.environ["DYNAMO_PORT"] = "3999"
def get_http_binary_path(): def get_http_binary_path():
"""Find the HTTP binary path in SDK or fallback to 'http' command.""" """Find the HTTP binary path in SDK or fallback to 'http' command."""
...@@ -105,6 +109,17 @@ class Frontend: ...@@ -105,6 +109,17 @@ class Frontend:
stderr=None, stderr=None,
) )
@api()
def dummy_api(self) -> None:
"""
Dummy API to enable the HTTP server for the Dynamo operator.
This API is not used by the model.
NOTE: this is a temporary solution to expose ingress
for the LLM examples. Will be fixed and removed in the future.
The resulting api_endpoints in dynamo.yaml will be incorrect.
"""
@async_on_shutdown @async_on_shutdown
def cleanup(self): def cleanup(self):
"""Clean up resources before shutdown.""" """Clean up resources before shutdown."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment