"lib/llm/src/vscode:/vscode.git/clone" did not exist on "1d34af75ed36aacacf99ea99f83b16f5db0a32ed"
Unverified Commit 3bad9666 authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

feat: update examples to use public lora (#4807)

parent d13c017d
...@@ -35,7 +35,7 @@ DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 \ ...@@ -35,7 +35,7 @@ DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 \
python -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager \ python -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager \
--connector none \ --connector none \
--enable-lora \ --enable-lora \
--max-lora-rank 32 --max-lora-rank 64
################################## Example Usage ################################## ################################## Example Usage ##################################
...@@ -43,26 +43,21 @@ DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 \ ...@@ -43,26 +43,21 @@ DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 \
curl http://localhost:8000/v1/models | jq . curl http://localhost:8000/v1/models | jq .
# Load LoRA using s3 uri # Load LoRA using s3 uri
curl -X POST http://localhost:8081/v1/loras \ curl -s -X POST http://localhost:8081/v1/loras \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{ -d '{"lora_name": "codelion/Qwen3-0.6B-accuracy-recovery-lora",
"lora_name": "Neural-Hacker/Qwen3-Math-Reasoning-LoRA", "source": {"uri": "s3://my-loras/codelion/Qwen3-0.6B-accuracy-recovery-lora"}}' | jq .
"source": {
"uri": "s3://my-loras/Neural-Hacker/Qwen3-Math-Reasoning-LoRA"
}
}'
# Test LoRA inference # Test LoRA inference
curl -X POST http://localhost:8000/v1/chat/completions \ curl -X POST http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{ -d '{
"model": "Neural-Hacker/Qwen3-Math-Reasoning-LoRA", "model": "codelion/Qwen3-0.6B-accuracy-recovery-lora",
"messages": [{"role": "user", "content": "Solve (x*x - x + 1 = 0) for x"}], "messages": [{"role": "user", "content": "What is deep learning?"}],
"max_tokens": 300, "max_tokens": 300,
"temperature": 0.0 "temperature": 0.0
}' }'
# Find the minimum possible value of \( x^2 + y^2 \) given that \( x \) and \( y \) are real numbers satisfying \( xy(x^2 - y^2) = x^2 + y^2 \) and \( x \neq 0 \)
# Test base model inference (for comparison) # Test base model inference (for comparison)
curl -X POST http://localhost:8000/v1/chat/completions \ curl -X POST http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
...@@ -74,4 +69,4 @@ curl -X POST http://localhost:8000/v1/chat/completions \ ...@@ -74,4 +69,4 @@ curl -X POST http://localhost:8000/v1/chat/completions \
}' }'
# Unload LoRA # Unload LoRA
curl -X DELETE http://localhost:8081/v1/loras/Neural-Hacker/Qwen3-Math-Reasoning-LoRA curl -X DELETE http://localhost:8081/v1/loras/codelion/Qwen3-0.6B-accuracy-recovery-lora
...@@ -20,8 +20,8 @@ MINIO_SECRET_KEY="minioadmin" ...@@ -20,8 +20,8 @@ MINIO_SECRET_KEY="minioadmin"
BUCKET_NAME="my-loras" BUCKET_NAME="my-loras"
# Default LoRA to download (can be overridden) # Default LoRA to download (can be overridden)
HF_LORA_REPO="${HF_LORA_REPO:-Neural-Hacker/Qwen3-Math-Reasoning-LoRA}" HF_LORA_REPO="${HF_LORA_REPO:-codelion/Qwen3-0.6B-accuracy-recovery-lora}"
LORA_NAME="${LORA_NAME:-Neural-Hacker/Qwen3-Math-Reasoning-LoRA}" LORA_NAME="${LORA_NAME:-codelion/Qwen3-0.6B-accuracy-recovery-lora}"
# TEMP_DIR will be created using mktemp when needed # TEMP_DIR will be created using mktemp when needed
TEMP_DIR="" TEMP_DIR=""
...@@ -63,8 +63,8 @@ show_help() { ...@@ -63,8 +63,8 @@ show_help() {
echo " --help, -h Show this help message" echo " --help, -h Show this help message"
echo "" echo ""
echo "Environment Variables:" echo "Environment Variables:"
echo " HF_LORA_REPO Hugging Face repository (default: ${HF_LORA_REPO:-Neural-Hacker/Qwen3-Math-Reasoning-LoRA})" echo " HF_LORA_REPO Hugging Face repository (default: ${HF_LORA_REPO:-codelion/Qwen3-0.6B-accuracy-recovery-lora})"
echo " LORA_NAME Local name for the LoRA (default: ${LORA_NAME:-Neural-Hacker/Qwen3-Math-Reasoning-LoRA})" echo " LORA_NAME Local name for the LoRA (default: ${LORA_NAME:-codelion/Qwen3-0.6B-accuracy-recovery-lora})"
echo "" echo ""
echo "Examples:" echo "Examples:"
echo " $0 # Full setup" echo " $0 # Full setup"
...@@ -173,6 +173,7 @@ download_lora_from_hf() { ...@@ -173,6 +173,7 @@ download_lora_from_hf() {
print_success "LoRA downloaded to ${TEMP_DIR}" print_success "LoRA downloaded to ${TEMP_DIR}"
rm -rf "${TEMP_DIR}/.cache"
# List downloaded files # List downloaded files
echo "Downloaded files:" echo "Downloaded files:"
ls -lh "${TEMP_DIR}" ls -lh "${TEMP_DIR}"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment