run-neuron-test.sh 2.47 KB
Newer Older
1
2
#!/bin/bash

3
4
5
# This script build the Neuron docker image and run the API server inside the container.
# It serves a sanity check for compilation and basic model usage.
set -e
6
7
8
9
10
11
12
13
set -v

image_name="neuron/vllm-ci"
container_name="neuron_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"

HF_CACHE="$(realpath ~)/huggingface"
mkdir -p "${HF_CACHE}"
HF_MOUNT="/root/.cache/huggingface"
14
HF_TOKEN=$(aws secretsmanager get-secret-value  --secret-id "ci/vllm-neuron/hf-token" --region us-west-2 --query 'SecretString' --output text | jq -r .VLLM_NEURON_CI_HF_TOKEN)
15
16
17
18

NEURON_COMPILE_CACHE_URL="$(realpath ~)/neuron_compile_cache"
mkdir -p "${NEURON_COMPILE_CACHE_URL}"
NEURON_COMPILE_CACHE_MOUNT="/root/.cache/neuron_compile_cache"
19
20

# Try building the docker image
21
aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws
22
23
24
25
26
27
28

# prune old image and containers to save disk space, and only once a day
# by using a timestamp file in tmp.
if [ -f /tmp/neuron-docker-build-timestamp ]; then
    last_build=$(cat /tmp/neuron-docker-build-timestamp)
    current_time=$(date +%s)
    if [ $((current_time - last_build)) -gt 86400 ]; then
29
        # Remove dangling images (those that are not tagged and not used by any container)
30
        docker image prune -f
31
32
        # Remove unused volumes / force the system prune for old images as well.
        docker volume prune -f && docker system prune -f
33
        echo "$current_time" > /tmp/neuron-docker-build-timestamp
34
35
    fi
else
36
    date "+%s" > /tmp/neuron-docker-build-timestamp
37
38
fi

39
docker build -t "${image_name}" -f docker/Dockerfile.neuron .
40
41

# Setup cleanup
42
43
44
remove_docker_container() {
    docker image rm -f "${image_name}" || true;
}
45
46
47
trap remove_docker_container EXIT

# Run the image
48
docker run --rm -it --device=/dev/neuron0 --network bridge \
49
50
       -v "${HF_CACHE}:${HF_MOUNT}" \
       -e "HF_HOME=${HF_MOUNT}" \
51
       -e "HF_TOKEN=${HF_TOKEN}" \
52
53
54
55
       -v "${NEURON_COMPILE_CACHE_URL}:${NEURON_COMPILE_CACHE_MOUNT}" \
       -e "NEURON_COMPILE_CACHE_URL=${NEURON_COMPILE_CACHE_MOUNT}" \
       --name "${container_name}" \
       ${image_name} \
56
57
58
59
60
61
62
63
       /bin/bash -c "
            python3 /workspace/vllm/examples/offline_inference/neuron.py;
            python3 -m pytest /workspace/vllm/tests/neuron/1_core/ -v --capture=tee-sys;
            for f in /workspace/vllm/tests/neuron/2_core/*.py; do
                echo 'Running test file: '$f;
                python3 -m pytest \$f -v --capture=tee-sys;
            done
       "