run_test.sh 4.06 KB
Newer Older
1
#!/bin/bash
2
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# SPDX-License-Identifier: Apache-2.0

# LMCache Dynamo One-Click Test Script

MODEL_URL=${1:-"Qwen/Qwen3-0.6B"}
NUM_SUBJECTS=${2:-15}

echo "🧪 LMCache Dynamo Complete Test"
echo "==============================="
echo "Model: $MODEL_URL"
echo "Number of subjects: $NUM_SUBJECTS"
echo ""

# Function to cleanup processes
cleanup() {
    echo "🧹 Cleaning up running processes..."

    # Stop docker services
21
    docker compose -f ../../deploy/docker-compose.yml down 2>/dev/null || true
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

    # Wait a moment for cleanup
    sleep 2
}

# Set trap for cleanup on exit
trap cleanup EXIT

# Check if data exists
if [ ! -d "data/test" ] || [ ! -d "data/dev" ]; then
    echo "📚 MMLU dataset not found, starting download..."

    # Check if Python dependencies are installed
    if ! python3 -c "import datasets, pandas" 2>/dev/null; then
        echo "📦 Installing Python dependencies..."
        pip install datasets pandas
    fi

    python3 download_mmlu.py

    if [ $? -ne 0 ]; then
        echo "❌ Data download failed, exiting"
        exit 1
    fi
else
    echo "✅ MMLU dataset already exists"
fi

echo ""
echo "🔬 Step 1: Baseline Test (LMCache disabled)"
echo "==========================================="

# Run baseline test
echo "🚀 Starting baseline dynamo..."
timeout 600 ./deploy-1-dynamo.sh "$MODEL_URL" &
DEPLOY_PID=$!

# Wait for server to be ready
echo "⏳ Waiting for server to be ready..."
sleep 30

# Check if server is responding
max_attempts=30
attempt=0
66
until curl -s http://localhost:8000/v1/models > /dev/null 2>&1; do
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
    attempt=$((attempt + 1))
    if [ $attempt -gt $max_attempts ]; then
        echo "❌ Server failed to start within timeout"
        kill $DEPLOY_PID 2>/dev/null || true
        exit 1
    fi
    echo "⏳ Waiting for server... (attempt $attempt/$max_attempts)"
    sleep 10
done

echo "📊 Running baseline MMLU test..."
python3 1-mmlu-dynamo.py --model "$MODEL_URL" --number-of-subjects $NUM_SUBJECTS

if [ $? -ne 0 ]; then
    echo "❌ Baseline test failed"
    kill $DEPLOY_PID 2>/dev/null || true
    exit 1
fi

echo "🛑 Stopping baseline services..."
kill $DEPLOY_PID 2>/dev/null || true
cleanup
sleep 5

echo ""
echo "🔬 Step 2: LMCache Test (LMCache enabled)"
echo "========================================="

# Run LMCache test
echo "🚀 Starting LMCache dynamo..."
timeout 600 ./deploy-2-dynamo.sh "$MODEL_URL" &
DEPLOY_PID=$!

# Wait for server to be ready
echo "⏳ Waiting for server to be ready..."
sleep 30

# Check if server is responding
attempt=0
106
until curl -s http://localhost:8000/v1/models > /dev/null 2>&1; do
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
    attempt=$((attempt + 1))
    if [ $attempt -gt $max_attempts ]; then
        echo "❌ Server failed to start within timeout"
        kill $DEPLOY_PID 2>/dev/null || true
        exit 1
    fi
    echo "⏳ Waiting for server... (attempt $attempt/$max_attempts)"
    sleep 10
done

echo "📊 Running LMCache MMLU test..."
python3 2-mmlu-dynamo.py --model "$MODEL_URL" --number-of-subjects $NUM_SUBJECTS

if [ $? -ne 0 ]; then
    echo "❌ LMCache test failed"
    kill $DEPLOY_PID 2>/dev/null || true
    exit 1
fi

echo "🛑 Stopping LMCache services..."
kill $DEPLOY_PID 2>/dev/null || true
cleanup

echo ""
echo "📈 Step 3: Result Analysis"
echo "========================="

# Analyze results
python3 summarize_scores_dynamo.py

echo ""
echo "🎉 Test Complete!"
echo "================"

# Check if result files exist
baseline_file=$(ls dynamo-baseline-*.jsonl 2>/dev/null | head -1)
lmcache_file=$(ls dynamo-lmcache-*.jsonl 2>/dev/null | head -1)

if [ -n "$baseline_file" ] && [ -n "$lmcache_file" ]; then
    echo "✅ Generated result files:"
    echo "   - Baseline test: $baseline_file"
    echo "   - LMCache test: $lmcache_file"
    echo ""
    echo "💡 If accuracy difference < 1%, LMCache functionality is correct"
else
    echo "⚠️ Complete result files not found, please check if there were errors during testing"
fi

echo ""
echo "🔧 To re-run:"
157
echo "   ./run_test.sh \"$MODEL_URL\" $NUM_SUBJECTS"