Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
3c2b72b0
"docs/vscode:/vscode.git/clone" did not exist on "56d91ee962d945f2eab29dbc094c5d8f429070cd"
Unverified
Commit
3c2b72b0
authored
Dec 11, 2025
by
Biswa Panda
Committed by
GitHub
Dec 11, 2025
Browse files
fix: [lora] refactor test and clean up examples (#4884)
parent
242a4d5b
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
109 additions
and
121 deletions
+109
-121
examples/backends/vllm/launch/lora/agg_lora.sh
examples/backends/vllm/launch/lora/agg_lora.sh
+1
-11
examples/backends/vllm/launch/lora/agg_lora_router.sh
examples/backends/vllm/launch/lora/agg_lora_router.sh
+1
-9
tests/serve/conftest.py
tests/serve/conftest.py
+2
-2
tests/serve/lora_utils.py
tests/serve/lora_utils.py
+14
-10
tests/serve/test_vllm.py
tests/serve/test_vllm.py
+2
-89
tests/utils/payloads.py
tests/utils/payloads.py
+89
-0
No files found.
examples/backends/vllm/launch/lora/agg_lora.sh
View file @
3c2b72b0
...
...
@@ -4,14 +4,6 @@
set
-e
trap
'echo Cleaning up...; kill 0'
EXIT
# Follow the README.md instructions to setup MinIO or upload the LoRA to s3/minio
# Adjust these values to match your local MinIO or S3 setup
# load math lora to minio
# LORA_NAME=Neural-Hacker/Qwen3-Math-Reasoning-LoRA HF_LORA_REPO=Neural-Hacker/Qwen3-Math-Reasoning-LoRA ./setup_minio.sh
export
AWS_ENDPOINT
=
http://localhost:9000
export
AWS_ACCESS_KEY_ID
=
minioadmin
export
AWS_SECRET_ACCESS_KEY
=
minioadmin
...
...
@@ -21,8 +13,6 @@ export AWS_ALLOW_HTTP=true
# Dynamo LoRA Configuration
export
DYN_LORA_ENABLED
=
true
export
DYN_LORA_PATH
=
/tmp/dynamo_loras_minio
export
DYN_LOG
=
debug
# export DYN_LOG_LEVEL=debug
mkdir
-p
$DYN_LORA_PATH
...
...
@@ -63,7 +53,7 @@ curl -X POST http://localhost:8000/v1/chat/completions \
-H
"Content-Type: application/json"
\
-d
'{
"model": "Qwen/Qwen3-0.6B",
"messages": [{"role": "user", "content": "
Solve (x*x - x + 1 = 0) for x
"}],
"messages": [{"role": "user", "content": "
What is deep learning?
"}],
"max_tokens": 300,
"temperature": 0.0
}'
...
...
examples/backends/vllm/launch/lora/agg_lora_router.sh
View file @
3c2b72b0
...
...
@@ -4,12 +4,6 @@
set
-e
trap
'echo Cleaning up...; kill 0'
EXIT
# Follow the README.md instructions to setup MinIO or upload the LoRA to s3/minio
# Adjust these values to match your local MinIO or S3 setup
# load math lora to minio
# LORA_NAME=Neural-Hacker/Qwen3-Math-Reasoning-LoRA HF_LORA_REPO=Neural-Hacker/Qwen3-Math-Reasoning-LoRA ./setup_minio.sh
export
AWS_ENDPOINT
=
http://localhost:9000
export
AWS_ACCESS_KEY_ID
=
minioadmin
export
AWS_SECRET_ACCESS_KEY
=
minioadmin
...
...
@@ -19,8 +13,6 @@ export AWS_ALLOW_HTTP=true
# Dynamo LoRA Configuration
export
DYN_LORA_ENABLED
=
true
export
DYN_LORA_PATH
=
/tmp/dynamo_loras_minio
export
DYN_LOG
=
debug
# export DYN_LOG_LEVEL=debug
mkdir
-p
$DYN_LORA_PATH
...
...
@@ -118,7 +110,7 @@ curl localhost:8000/v1/chat/completions \
"total_tokens"
: 226,
"prompt_tokens_details"
:
{
"audio_tokens"
: null,
"cached_tokens"
: 192
"cached_tokens"
: 192
# tokens that were cached from the previous request.
}
}
,
"nvext"
:
{
...
...
tests/serve/conftest.py
View file @
3c2b72b0
...
...
@@ -86,8 +86,8 @@ def minio_lora_service():
local_path
=
service
.
download_lora
()
service
.
upload_lora
(
local_path
)
# Clean up downloaded files (keep MinIO
running
)
service
.
cleanup_
temp
()
# Clean up downloaded files (keep MinIO
data intact
)
service
.
cleanup_
download
()
yield
config
...
...
tests/serve/lora_utils.py
View file @
3c2b72b0
...
...
@@ -61,7 +61,7 @@ class MinioService:
def
__init__
(
self
,
config
:
MinioLoraConfig
):
self
.
config
=
config
self
.
_logger
=
logging
.
getLogger
(
self
.
__class__
.
__name__
)
self
.
_temp_dir
:
Optional
[
str
]
=
None
self
.
_temp_
download_
dir
:
Optional
[
str
]
=
None
def
start
(
self
)
->
None
:
"""Start MinIO container"""
...
...
@@ -183,9 +183,9 @@ class MinioService:
def
download_lora
(
self
)
->
str
:
"""Download LoRA from Hugging Face Hub, returns temp directory path"""
self
.
_temp_dir
=
tempfile
.
mkdtemp
(
prefix
=
"lora_download_"
)
self
.
_temp_
download_
dir
=
tempfile
.
mkdtemp
(
prefix
=
"lora_download_"
)
self
.
_logger
.
info
(
f
"Downloading LoRA
{
self
.
config
.
lora_repo
}
to
{
self
.
_temp_dir
}
"
f
"Downloading LoRA
{
self
.
config
.
lora_repo
}
to
{
self
.
_temp_
download_
dir
}
"
)
result
=
subprocess
.
run
(
...
...
@@ -194,7 +194,7 @@ class MinioService:
"download"
,
self
.
config
.
lora_repo
,
"--local-dir"
,
self
.
_temp_dir
,
self
.
_temp_
download_
dir
,
"--local-dir-use-symlinks"
,
"False"
,
],
...
...
@@ -206,11 +206,11 @@ class MinioService:
raise
RuntimeError
(
f
"Failed to download LoRA:
{
result
.
stderr
}
"
)
# Clean up cache directory
cache_dir
=
os
.
path
.
join
(
self
.
_temp_dir
,
".cache"
)
cache_dir
=
os
.
path
.
join
(
self
.
_temp_
download_
dir
,
".cache"
)
if
os
.
path
.
exists
(
cache_dir
):
shutil
.
rmtree
(
cache_dir
)
return
self
.
_temp_dir
return
self
.
_temp_
download_
dir
def
upload_lora
(
self
,
local_path
:
str
)
->
None
:
"""Upload LoRA to MinIO"""
...
...
@@ -246,11 +246,15 @@ class MinioService:
if
result
.
returncode
!=
0
:
raise
RuntimeError
(
f
"Failed to upload LoRA:
{
result
.
stderr
}
"
)
def
cleanup_download
(
self
)
->
None
:
"""Clean up temporary download directory only"""
if
self
.
_temp_download_dir
and
os
.
path
.
exists
(
self
.
_temp_download_dir
):
shutil
.
rmtree
(
self
.
_temp_download_dir
)
self
.
_temp_download_dir
=
None
def
cleanup_temp
(
self
)
->
None
:
"""Clean up temporary directories"""
if
self
.
_temp_dir
and
os
.
path
.
exists
(
self
.
_temp_dir
):
shutil
.
rmtree
(
self
.
_temp_dir
)
self
.
_temp_dir
=
None
"""Clean up all temporary directories including MinIO data dir"""
self
.
cleanup_download
()
if
self
.
config
.
data_dir
and
os
.
path
.
exists
(
self
.
config
.
data_dir
):
shutil
.
rmtree
(
self
.
config
.
data_dir
,
ignore_errors
=
True
)
...
...
tests/serve/test_vllm.py
View file @
3c2b72b0
...
...
@@ -16,7 +16,7 @@ from tests.serve.common import (
run_serve_deployment
,
)
from
tests.serve.conftest
import
MULTIMODAL_IMG_PATH
,
MULTIMODAL_IMG_URL
from
tests.serve.lora_utils
import
MinioLoraConfig
,
load_lora_adapter
from
tests.serve.lora_utils
import
MinioLoraConfig
from
tests.utils.engine_process
import
EngineConfig
from
tests.utils.payload_builder
import
(
chat_payload
,
...
...
@@ -26,7 +26,7 @@ from tests.utils.payload_builder import (
completion_payload_with_logprobs
,
metric_payload_default
,
)
from
tests.utils.payloads
import
ChatPayload
,
ToolCallingChatPayload
from
tests.utils.payloads
import
LoraTest
ChatPayload
,
ToolCallingChatPayload
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -614,93 +614,6 @@ def test_multimodal_b64(request, runtime_services, predownload_models):
lora_dir
=
os
.
path
.
join
(
vllm_dir
,
"launch/lora"
)
class
LoraTestChatPayload
(
ChatPayload
):
"""
Chat payload that loads a LoRA adapter before sending inference requests.
This payload first loads the specified LoRA adapter via the system API,
then sends chat completion requests using the LoRA model.
"""
def
__init__
(
self
,
body
:
dict
,
lora_name
:
str
,
s3_uri
:
str
,
system_port
:
int
=
8081
,
repeat_count
:
int
=
1
,
expected_response
:
Optional
[
list
]
=
None
,
expected_log
:
Optional
[
list
]
=
None
,
timeout
:
int
=
60
,
):
super
().
__init__
(
body
=
body
,
repeat_count
=
repeat_count
,
expected_response
=
expected_response
or
[],
expected_log
=
expected_log
or
[],
timeout
=
timeout
,
)
self
.
system_port
=
system_port
self
.
lora_name
=
lora_name
self
.
s3_uri
=
s3_uri
self
.
_lora_loaded
=
False
def
_ensure_lora_loaded
(
self
)
->
None
:
"""Ensure the LoRA adapter is loaded before making inference requests"""
if
not
self
.
_lora_loaded
:
import
time
import
requests
load_lora_adapter
(
system_port
=
self
.
system_port
,
lora_name
=
self
.
lora_name
,
s3_uri
=
self
.
s3_uri
,
timeout
=
self
.
timeout
,
)
# Wait for the LoRA model to appear in /v1/models
models_url
=
f
"http://
{
self
.
host
}
:
{
self
.
port
}
/v1/models"
start_time
=
time
.
time
()
max_wait
=
60
# 1 minute timeout
logger
.
info
(
f
"Waiting for LoRA model '
{
self
.
lora_name
}
' to appear in /v1/models..."
)
while
time
.
time
()
-
start_time
<
max_wait
:
try
:
response
=
requests
.
get
(
models_url
,
timeout
=
5
)
if
response
.
status_code
==
200
:
data
=
response
.
json
()
models
=
data
.
get
(
"data"
,
[])
model_ids
=
[
m
.
get
(
"id"
,
""
)
for
m
in
models
]
if
self
.
lora_name
in
model_ids
:
logger
.
info
(
f
"LoRA model '
{
self
.
lora_name
}
' is now available"
)
self
.
_lora_loaded
=
True
return
logger
.
debug
(
f
"Available models:
{
model_ids
}
, waiting for '
{
self
.
lora_name
}
'..."
)
except
requests
.
RequestException
as
e
:
logger
.
debug
(
f
"Error checking /v1/models:
{
e
}
"
)
time
.
sleep
(
1
)
raise
RuntimeError
(
f
"Timeout: LoRA model '
{
self
.
lora_name
}
' did not appear in /v1/models within
{
max_wait
}
s"
)
def
url
(
self
)
->
str
:
"""Load LoRA before first request, then return URL"""
self
.
_ensure_lora_loaded
()
return
super
().
url
()
def
lora_chat_payload
(
lora_name
:
str
,
s3_uri
:
str
,
...
...
tests/utils/payloads.py
View file @
3c2b72b0
...
...
@@ -21,6 +21,8 @@ from copy import deepcopy
from
dataclasses
import
dataclass
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Optional
import
requests
from
dynamo
import
prometheus_names
# type: ignore[attr-defined]
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -240,6 +242,93 @@ class ToolCallingChatPayload(ChatPayload):
logger
.
info
(
f
"Expected tool '
{
self
.
expected_tool_name
}
' was called"
)
@
dataclass
class
LoraTestChatPayload
(
ChatPayload
):
"""
Chat payload that loads a LoRA adapter before sending inference requests.
This payload first loads the specified LoRA adapter via the system API,
then sends chat completion requests using the LoRA model.
"""
def
__init__
(
self
,
body
:
dict
,
lora_name
:
str
,
s3_uri
:
str
,
system_port
:
int
=
8081
,
repeat_count
:
int
=
1
,
expected_response
:
Optional
[
list
]
=
None
,
expected_log
:
Optional
[
list
]
=
None
,
timeout
:
int
=
60
,
):
super
().
__init__
(
body
=
body
,
repeat_count
=
repeat_count
,
expected_response
=
expected_response
or
[],
expected_log
=
expected_log
or
[],
timeout
=
timeout
,
)
self
.
system_port
=
system_port
self
.
lora_name
=
lora_name
self
.
s3_uri
=
s3_uri
self
.
_lora_loaded
=
False
def
_ensure_lora_loaded
(
self
)
->
None
:
"""Ensure the LoRA adapter is loaded before making inference requests"""
if
not
self
.
_lora_loaded
:
# Import the load_lora_adapter function
# Note: This import is done here to avoid circular dependencies
from
tests.serve.lora_utils
import
load_lora_adapter
load_lora_adapter
(
system_port
=
self
.
system_port
,
lora_name
=
self
.
lora_name
,
s3_uri
=
self
.
s3_uri
,
timeout
=
self
.
timeout
,
)
# Wait for the LoRA model to appear in /v1/models
models_url
=
f
"http://
{
self
.
host
}
:
{
self
.
port
}
/v1/models"
start_time
=
time
.
time
()
logger
.
info
(
f
"Waiting for LoRA model '
{
self
.
lora_name
}
' to appear in /v1/models..."
)
while
time
.
time
()
-
start_time
<
self
.
timeout
:
try
:
response
=
requests
.
get
(
models_url
,
timeout
=
5
)
if
response
.
status_code
==
200
:
data
=
response
.
json
()
models
=
data
.
get
(
"data"
,
[])
model_ids
=
[
m
.
get
(
"id"
,
""
)
for
m
in
models
]
if
self
.
lora_name
in
model_ids
:
logger
.
info
(
f
"LoRA model '
{
self
.
lora_name
}
' is now available"
)
self
.
_lora_loaded
=
True
return
logger
.
debug
(
f
"Available models:
{
model_ids
}
, waiting for '
{
self
.
lora_name
}
'..."
)
except
requests
.
RequestException
as
e
:
logger
.
debug
(
f
"Error checking /v1/models:
{
e
}
"
)
time
.
sleep
(
1
)
raise
RuntimeError
(
f
"Timeout: LoRA model '
{
self
.
lora_name
}
' did not appear in /v1/models within
{
self
.
timeout
}
s"
)
def
url
(
self
)
->
str
:
"""Load LoRA before first request, then return URL"""
self
.
_ensure_lora_loaded
()
return
super
().
url
()
@
dataclass
class
CompletionPayload
(
BasePayload
):
"""Payload for completions endpoint."""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment