train.sh

source /opt/dtk-24.04.1/env.sh
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export HSA_FORCE_FINE_GRAIN_PCIE=1
export FLAGS_cudnn_batchnorm_spatial_persistent=1

export NCCL_MAX_NCHANNELS=20
export NCCL_MIN_NCHANNELS=20
export NCCL_P2P_LEVEL=SYS
export GPU_MAX_HW_QUEUES=16

# 获取训练时间戳
start=$(date +%s.%N)

# recommended paddle.__version__ == 2.0.0
#wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/MobileNetV3_large_x0_5_pretrained.pdparams

numactl --cpunodebind=0 --membind=0  python3 -m paddle.distributed.launch --log_dir=./debug/ --gpus '0,1,2,3,4,5,6,7' tools/train.py \
		-c configs/det/det_mv3_db.yml -o Global.epoch_num=1500 Global.eval_batch_step=[0,60] Train.loader.batch_size_per_card=48 \
		Train.loader.num_workers=8 Eval.loader.num_workers=0

wait
# 获取训练结束时间戳，并计算差值得到总耗时，单位为秒
end=$(date +%s.%N)
runtime=$(echo "$end - $start" | bc)
echo "Total Time: $runtime" >> ttal_time.log