feat: update examples to use public lora (#4807)

3bad9666 · Biswa Panda · GitHub · d13c017d · 3bad9666 · 3bad9666
Unverified Commit 3bad9666 authored Dec 08, 2025 by Biswa Panda Committed by GitHub Dec 08, 2025
Showing with 13 additions and 17 deletions

examples/backends/vllm/launch/lora/agg_lora_s3.sh examples/backends/vllm/launch/lora/agg_lora_s3.sh +8 -13

examples/backends/vllm/launch/lora/setup_minio.sh examples/backends/vllm/launch/lora/setup_minio.sh +5 -4

No files found.
--- a/examples/backends/vllm/launch/lora/agg_lora_s3.sh
+++ b/examples/backends/vllm/launch/lora/agg_lora_s3.sh
@@ -35,7 +35,7 @@ DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 \
    python -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager  \
    --connector none  \
    --enable-lora  \
-    --max-lora-rank 32
+    --max-lora-rank 64
 ################################## Example Usage ##################################
@@ -43,26 +43,21 @@ DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 \
 curl http://localhost:8000/v1/models | jq .
 # Load LoRA using s3 uri
-curl -X POST http://localhost:8081/v1/loras \
+curl -s  -X POST http://localhost:8081/v1/loras \
       -H "Content-Type: application/json" \
-  -d '{
+       -d '{"lora_name": "codelion/Qwen3-0.6B-accuracy-recovery-lora",
-    "lora_name": "Neural-Hacker/Qwen3-Math-Reasoning-LoRA",
+     "source": {"uri": "s3://my-loras/codelion/Qwen3-0.6B-accuracy-recovery-lora"}}' | jq .
-    "source": {
-      "uri": "s3://my-loras/Neural-Hacker/Qwen3-Math-Reasoning-LoRA"
-    }
-  }'
 # Test LoRA inference
 curl -X POST http://localhost:8000/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
-    "model": "Neural-Hacker/Qwen3-Math-Reasoning-LoRA",
+    "model": "codelion/Qwen3-0.6B-accuracy-recovery-lora",
-    "messages": [{"role": "user", "content": "Solve (x*x - x + 1 = 0) for x"}],
+    "messages": [{"role": "user", "content": "What is deep learning?"}],
    "max_tokens": 300,
    "temperature": 0.0
  }'
-# Find the minimum possible value of \( x^2 + y^2 \) given that \( x \) and \( y \) are real numbers satisfying \( xy(x^2 - y^2) = x^2 + y^2 \) and \( x \neq 0 \)
 # Test base model inference (for comparison)
 curl -X POST http://localhost:8000/v1/chat/completions \
  -H "Content-Type: application/json" \
@@ -74,4 +69,4 @@ curl -X POST http://localhost:8000/v1/chat/completions \
  }'
 # Unload LoRA
-curl -X DELETE http://localhost:8081/v1/loras/Neural-Hacker/Qwen3-Math-Reasoning-LoRA
+curl -X DELETE http://localhost:8081/v1/loras/codelion/Qwen3-0.6B-accuracy-recovery-lora
--- a/examples/backends/vllm/launch/lora/setup_minio.sh
+++ b/examples/backends/vllm/launch/lora/setup_minio.sh
@@ -20,8 +20,8 @@ MINIO_SECRET_KEY="minioadmin"
 BUCKET_NAME="my-loras"
 # Default LoRA to download (can be overridden)
-HF_LORA_REPO="${HF_LORA_REPO:-Neural-Hacker/Qwen3-Math-Reasoning-LoRA}"
+HF_LORA_REPO="${HF_LORA_REPO:-codelion/Qwen3-0.6B-accuracy-recovery-lora}"
-LORA_NAME="${LORA_NAME:-Neural-Hacker/Qwen3-Math-Reasoning-LoRA}"
+LORA_NAME="${LORA_NAME:-codelion/Qwen3-0.6B-accuracy-recovery-lora}"
 # TEMP_DIR will be created using mktemp when needed
 TEMP_DIR=""
@@ -63,8 +63,8 @@ show_help() {
    echo "  --help, -h    Show this help message"
    echo ""
    echo "Environment Variables:"
-    echo "  HF_LORA_REPO  Hugging Face repository (default: ${HF_LORA_REPO:-Neural-Hacker/Qwen3-Math-Reasoning-LoRA})"
+    echo "  HF_LORA_REPO  Hugging Face repository (default: ${HF_LORA_REPO:-codelion/Qwen3-0.6B-accuracy-recovery-lora})"
-    echo "  LORA_NAME     Local name for the LoRA (default: ${LORA_NAME:-Neural-Hacker/Qwen3-Math-Reasoning-LoRA})"
+    echo "  LORA_NAME     Local name for the LoRA (default: ${LORA_NAME:-codelion/Qwen3-0.6B-accuracy-recovery-lora})"
    echo ""
    echo "Examples:"
    echo "  $0                                    # Full setup"
@@ -173,6 +173,7 @@ download_lora_from_hf() {
    print_success "LoRA downloaded to ${TEMP_DIR}"
+    rm -rf "${TEMP_DIR}/.cache"
    # List downloaded files
    echo "Downloaded files:"
    ls -lh "${TEMP_DIR}"