Unverified Commit 3bad9666 authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

feat: update examples to use public lora (#4807)

parent d13c017d
......@@ -35,7 +35,7 @@ DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 \
python -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager \
--connector none \
--enable-lora \
--max-lora-rank 32
--max-lora-rank 64
################################## Example Usage ##################################
......@@ -43,26 +43,21 @@ DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 \
curl http://localhost:8000/v1/models | jq .
# Load LoRA using s3 uri
curl -X POST http://localhost:8081/v1/loras \
-H "Content-Type: application/json" \
-d '{
"lora_name": "Neural-Hacker/Qwen3-Math-Reasoning-LoRA",
"source": {
"uri": "s3://my-loras/Neural-Hacker/Qwen3-Math-Reasoning-LoRA"
}
}'
curl -s -X POST http://localhost:8081/v1/loras \
-H "Content-Type: application/json" \
-d '{"lora_name": "codelion/Qwen3-0.6B-accuracy-recovery-lora",
"source": {"uri": "s3://my-loras/codelion/Qwen3-0.6B-accuracy-recovery-lora"}}' | jq .
# Test LoRA inference
curl -X POST http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "Neural-Hacker/Qwen3-Math-Reasoning-LoRA",
"messages": [{"role": "user", "content": "Solve (x*x - x + 1 = 0) for x"}],
"model": "codelion/Qwen3-0.6B-accuracy-recovery-lora",
"messages": [{"role": "user", "content": "What is deep learning?"}],
"max_tokens": 300,
"temperature": 0.0
}'
# Find the minimum possible value of \( x^2 + y^2 \) given that \( x \) and \( y \) are real numbers satisfying \( xy(x^2 - y^2) = x^2 + y^2 \) and \( x \neq 0 \)
# Test base model inference (for comparison)
curl -X POST http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
......@@ -74,4 +69,4 @@ curl -X POST http://localhost:8000/v1/chat/completions \
}'
# Unload LoRA
curl -X DELETE http://localhost:8081/v1/loras/Neural-Hacker/Qwen3-Math-Reasoning-LoRA
curl -X DELETE http://localhost:8081/v1/loras/codelion/Qwen3-0.6B-accuracy-recovery-lora
......@@ -20,8 +20,8 @@ MINIO_SECRET_KEY="minioadmin"
BUCKET_NAME="my-loras"
# Default LoRA to download (can be overridden)
HF_LORA_REPO="${HF_LORA_REPO:-Neural-Hacker/Qwen3-Math-Reasoning-LoRA}"
LORA_NAME="${LORA_NAME:-Neural-Hacker/Qwen3-Math-Reasoning-LoRA}"
HF_LORA_REPO="${HF_LORA_REPO:-codelion/Qwen3-0.6B-accuracy-recovery-lora}"
LORA_NAME="${LORA_NAME:-codelion/Qwen3-0.6B-accuracy-recovery-lora}"
# TEMP_DIR will be created using mktemp when needed
TEMP_DIR=""
......@@ -63,8 +63,8 @@ show_help() {
echo " --help, -h Show this help message"
echo ""
echo "Environment Variables:"
echo " HF_LORA_REPO Hugging Face repository (default: ${HF_LORA_REPO:-Neural-Hacker/Qwen3-Math-Reasoning-LoRA})"
echo " LORA_NAME Local name for the LoRA (default: ${LORA_NAME:-Neural-Hacker/Qwen3-Math-Reasoning-LoRA})"
echo " HF_LORA_REPO Hugging Face repository (default: ${HF_LORA_REPO:-codelion/Qwen3-0.6B-accuracy-recovery-lora})"
echo " LORA_NAME Local name for the LoRA (default: ${LORA_NAME:-codelion/Qwen3-0.6B-accuracy-recovery-lora})"
echo ""
echo "Examples:"
echo " $0 # Full setup"
......@@ -173,6 +173,7 @@ download_lora_from_hf() {
print_success "LoRA downloaded to ${TEMP_DIR}"
rm -rf "${TEMP_DIR}/.cache"
# List downloaded files
echo "Downloaded files:"
ls -lh "${TEMP_DIR}"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment