Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
aba3ab03
Unverified
Commit
aba3ab03
authored
Jun 03, 2025
by
hhzhang16
Committed by
GitHub
Jun 03, 2025
Browse files
fix: add ingress to llm example (#1349)
parent
a2ed85a2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
3 deletions
+18
-3
examples/llm/README.md
examples/llm/README.md
+2
-2
examples/llm/components/frontend.py
examples/llm/components/frontend.py
+16
-1
No files found.
examples/llm/README.md
View file @
aba3ab03
...
@@ -232,10 +232,10 @@ Once the deployment is complete, you can test it using:
...
@@ -232,10 +232,10 @@ Once the deployment is complete, you can test it using:
export
FRONTEND_POD
=
$(
kubectl get pods
-n
${
KUBE_NS
}
|
grep
"
${
DEPLOYMENT_NAME
}
-frontend"
|
sort
-k1
|
tail
-n1
|
awk
'{print $1}'
)
export
FRONTEND_POD
=
$(
kubectl get pods
-n
${
KUBE_NS
}
|
grep
"
${
DEPLOYMENT_NAME
}
-frontend"
|
sort
-k1
|
tail
-n1
|
awk
'{print $1}'
)
# Forward the pod's port to localhost
# Forward the pod's port to localhost
kubectl port-forward pod/
$FRONTEND_POD
8
000:
8
000
-n
${
KUBE_NS
}
kubectl port-forward pod/
$FRONTEND_POD
3
000:
3
000
-n
${
KUBE_NS
}
# Test the API endpoint
# Test the API endpoint
curl localhost:
8
000/v1/chat/completions
\
curl localhost:
3
000/v1/chat/completions
\
-H
"Content-Type: application/json"
\
-H
"Content-Type: application/json"
\
-d
'{
-d
'{
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
...
...
examples/llm/components/frontend.py
View file @
aba3ab03
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
# limitations under the License.
# limitations under the License.
import
logging
import
logging
import
os
import
subprocess
import
subprocess
from
pathlib
import
Path
from
pathlib
import
Path
...
@@ -23,12 +24,15 @@ from components.worker import VllmWorker
...
@@ -23,12 +24,15 @@ from components.worker import VllmWorker
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
from
dynamo
import
sdk
from
dynamo
import
sdk
from
dynamo.sdk
import
async_on_shutdown
,
depends
,
service
from
dynamo.sdk
import
api
,
async_on_shutdown
,
depends
,
service
from
dynamo.sdk.lib.config
import
ServiceConfig
from
dynamo.sdk.lib.config
import
ServiceConfig
from
dynamo.sdk.lib.image
import
DYNAMO_IMAGE
from
dynamo.sdk.lib.image
import
DYNAMO_IMAGE
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
# TODO: temp workaround to avoid port conflict with subprocess HTTP server; remove this once ingress is fixed
os
.
environ
[
"DYNAMO_PORT"
]
=
"3999"
def
get_http_binary_path
():
def
get_http_binary_path
():
"""Find the HTTP binary path in SDK or fallback to 'http' command."""
"""Find the HTTP binary path in SDK or fallback to 'http' command."""
...
@@ -105,6 +109,17 @@ class Frontend:
...
@@ -105,6 +109,17 @@ class Frontend:
stderr
=
None
,
stderr
=
None
,
)
)
@
api
()
def
dummy_api
(
self
)
->
None
:
"""
Dummy API to enable the HTTP server for the Dynamo operator.
This API is not used by the model.
NOTE: this is a temporary solution to expose ingress
for the LLM examples. Will be fixed and removed in the future.
The resulting api_endpoints in dynamo.yaml will be incorrect.
"""
@
async_on_shutdown
@
async_on_shutdown
def
cleanup
(
self
):
def
cleanup
(
self
):
"""Clean up resources before shutdown."""
"""Clean up resources before shutdown."""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment