source config_DGXA100_001x08x032.sh export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 #export HSA_FORCE_FINE_GRAIN_PCIE=1 #export MIOPEN_FIND_MODE=5 #export NCCL_NET_GDR_LEVEL=5 #export NCCL_P2P_LEVEL=5 torchrun --nproc_per_node=8 train.py --lr 0.000085 --batch-size 18 --eval-batch-size 32 --epochs 1 --print-freq 20 --dataset-path /public/home/liangjj/2023/training_results_v2.1-main/NVIDIA/benchmarks/ssd/implementations/pytorch-22.09/public-scripts/datasets/open-images-v6 --warmup-epochs 0 --frozen-bn-opt --frozen-bn-fp16 --apex-adam --disable-ddp-broadcast-buffers --fp16-allreduce --skip-metric-loss --async-coco #torchrun --standalone --nproc_per_node=8 --no_python ./dcu_run.sh