run_test.sh 4.18 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# LMCache Dynamo One-Click Test Script

MODEL_URL=${1:-"Qwen/Qwen3-0.6B"}
NUM_SUBJECTS=${2:-15}

echo "🧪 LMCache Dynamo Complete Test"
echo "==============================="
echo "Model: $MODEL_URL"
echo "Number of subjects: $NUM_SUBJECTS"
echo ""

# Function to cleanup processes
cleanup() {
    echo "🧹 Cleaning up running processes..."

    # Kill any remaining dynamo processes
    pkill -f "dynamo-run" || true
    pkill -f "components/main.py" || true

    # Stop docker services
    docker compose -f ../../deploy/metrics/docker-compose.yml down 2>/dev/null || true

    # Wait a moment for cleanup
    sleep 2
}

# Set trap for cleanup on exit
trap cleanup EXIT

# Check if data exists
if [ ! -d "data/test" ] || [ ! -d "data/dev" ]; then
    echo "📚 MMLU dataset not found, starting download..."

    # Check if Python dependencies are installed
    if ! python3 -c "import datasets, pandas" 2>/dev/null; then
        echo "📦 Installing Python dependencies..."
        pip install datasets pandas
    fi

    python3 download_mmlu.py

    if [ $? -ne 0 ]; then
        echo "❌ Data download failed, exiting"
        exit 1
    fi
else
    echo "✅ MMLU dataset already exists"
fi

echo ""
echo "🔬 Step 1: Baseline Test (LMCache disabled)"
echo "==========================================="

# Run baseline test
echo "🚀 Starting baseline dynamo..."
timeout 600 ./deploy-1-dynamo.sh "$MODEL_URL" &
DEPLOY_PID=$!

# Wait for server to be ready
echo "⏳ Waiting for server to be ready..."
sleep 30

# Check if server is responding
max_attempts=30
attempt=0
70
until curl -s http://localhost:8000/v1/models > /dev/null 2>&1; do
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
    attempt=$((attempt + 1))
    if [ $attempt -gt $max_attempts ]; then
        echo "❌ Server failed to start within timeout"
        kill $DEPLOY_PID 2>/dev/null || true
        exit 1
    fi
    echo "⏳ Waiting for server... (attempt $attempt/$max_attempts)"
    sleep 10
done

echo "📊 Running baseline MMLU test..."
python3 1-mmlu-dynamo.py --model "$MODEL_URL" --number-of-subjects $NUM_SUBJECTS

if [ $? -ne 0 ]; then
    echo "❌ Baseline test failed"
    kill $DEPLOY_PID 2>/dev/null || true
    exit 1
fi

echo "🛑 Stopping baseline services..."
kill $DEPLOY_PID 2>/dev/null || true
cleanup
sleep 5

echo ""
echo "🔬 Step 2: LMCache Test (LMCache enabled)"
echo "========================================="

# Run LMCache test
echo "🚀 Starting LMCache dynamo..."
timeout 600 ./deploy-2-dynamo.sh "$MODEL_URL" &
DEPLOY_PID=$!

# Wait for server to be ready
echo "⏳ Waiting for server to be ready..."
sleep 30

# Check if server is responding
attempt=0
110
until curl -s http://localhost:8000/v1/models > /dev/null 2>&1; do
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
    attempt=$((attempt + 1))
    if [ $attempt -gt $max_attempts ]; then
        echo "❌ Server failed to start within timeout"
        kill $DEPLOY_PID 2>/dev/null || true
        exit 1
    fi
    echo "⏳ Waiting for server... (attempt $attempt/$max_attempts)"
    sleep 10
done

echo "📊 Running LMCache MMLU test..."
python3 2-mmlu-dynamo.py --model "$MODEL_URL" --number-of-subjects $NUM_SUBJECTS

if [ $? -ne 0 ]; then
    echo "❌ LMCache test failed"
    kill $DEPLOY_PID 2>/dev/null || true
    exit 1
fi

echo "🛑 Stopping LMCache services..."
kill $DEPLOY_PID 2>/dev/null || true
cleanup

echo ""
echo "📈 Step 3: Result Analysis"
echo "========================="

# Analyze results
python3 summarize_scores_dynamo.py

echo ""
echo "🎉 Test Complete!"
echo "================"

# Check if result files exist
baseline_file=$(ls dynamo-baseline-*.jsonl 2>/dev/null | head -1)
lmcache_file=$(ls dynamo-lmcache-*.jsonl 2>/dev/null | head -1)

if [ -n "$baseline_file" ] && [ -n "$lmcache_file" ]; then
    echo "✅ Generated result files:"
    echo "   - Baseline test: $baseline_file"
    echo "   - LMCache test: $lmcache_file"
    echo ""
    echo "💡 If accuracy difference < 1%, LMCache functionality is correct"
else
    echo "⚠️ Complete result files not found, please check if there were errors during testing"
fi

echo ""
echo "🔧 To re-run:"
echo "   ./run_test.sh \"$MODEL_URL\" $NUM_SUBJECTS"