Commit 846c3f70 authored by panning's avatar panning
Browse files

添加单机四卡训练(混合精度)脚本

parent ce4fe02b
#!/bin/bash
export HSA_FORCE_FINE_GRAIN_PCIE=1
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
comm_rank=$OMPI_COMM_WORLD_RANK
comm_size=$OMPI_COMM_WORLD_SIZE
APP="python3 ../train.py --batch-size=${3} --arch=${2} -j 6 --epochs=90 --amp --opt-level O1 --loss-scale=dynamic --dist-url tcp://${1}:34567 --dist-backend nccl --world-size=${comm_size} --rank=${comm_rank} --save-path=/path/to/{save_model_dir} /path/to/{ImageNet_pytorch_data_dir}/"
case ${lrank} in
[0])
export HIP_VISIBLE_DEVICES=0,1,2,3
echo numactl --cpunodebind=0 --membind=0 ${APP}
numactl --cpunodebind=0 --membind=0 ${APP}
;;
[1])
export HIP_VISIBLE_DEVICES=0,1,2,3
echo numactl --cpunodebind=1 --membind=1 ${APP}
numactl --cpunodebind=1 --membind=1 ${APP}
;;
[2])
export HIP_VISIBLE_DEVICES=0,1,2,3
echo numactl --cpunodebind=2 --membind=2 ${APP}
numactl --cpunodebind=2 --membind=2 ${APP}
;;
[3])
export HIP_VISIBLE_DEVICES=0,1,2,3
echo numactl --cpunodebind=3 --membind=3 ${APP}
numactl --cpunodebind=3 --membind=3 ${APP}
;;
esac
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment