test_ci.sh 488 Bytes
Newer Older
1
#!/bin/bash
2
set -x
3
4
5

pip install -r requirements.txt

6
7
FAIL_LIMIT=3

8
for plugin in "torch_ddp" "torch_ddp_fp16" "gemini" "low_level_zero" "hybrid_parallel"; do
9
10
11
12
13
14
15
16
    for i in $(seq 1 $FAIL_LIMIT); do
        torchrun --standalone --nproc_per_node 4 finetune.py --target_f1 0.86 --plugin $plugin --model_type "bert" && break
        echo "Failed $i times"
        if [ $i -eq $FAIL_LIMIT ]; then
            echo "Failed $FAIL_LIMIT times, exiting"
            exit 1
        fi
    done
17
done