#!/bin/bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. set -ex #export ROCBLAS_LAYER=3 export FLAGS_rocm_dir=/opt/dtk-21.04/ export FLAGS_max_inplace_grad_add=2 export NCCL_P2P_LEVEL=5 export USE_NV_INPUT=1 USE_UNCOMPRESSED_DATASET=1 BASE_DATA_DIR=${BASE_DATA_DIR:-"/data/mlperf/bert"} export USE_NV_INPUT UNCOMPRESSED_DATA_DIR=$BASE_DATA_DIR/hdf5/training-4320/hdf5_4320_shards_uncompressed VARLENGTH_DATA_DIR=$BASE_DATA_DIR/hdf5/training-4320/hdf5_4320_shards_varlength export DATA_DIR=$UNCOMPRESSED_DATA_DIR export EVAL_DIR=$BASE_DATA_DIR/hdf5/eval if [[ "$USE_NV_INPUT" == "1" && "$USE_UNCOMPRESSED_DATASET" == "0" ]]; then export DATA_DIR="$VARLENGTH_DATA_DIR" export EVAL_DIR=$BASE_DATA_DIR/hdf5/eval else export USE_UNCOMPRESSED_DATASET=1 fi export USE_UNCOMPRESSED_DATASET export TF_CKPT_PATH=$BASE_DATA_DIR/phase1/model.ckpt-28252.tf_pickled export BERT_CONFIG_PATH=$BASE_DATA_DIR/phase1/bert_config.json export PYTHON=python3 export PADDLE_TRAINER_ID=${OMPI_COMM_WORLD_RANK} export PADDLE_TRAINERS_NUM=${PADDLE_TRAINERS_NUM:-"1"} export PADDLE_TRAINER_ENDPOINTS=${PADDLE_TRAINER_ENDPOINTS:-""} OMPI_COMM_WORLD_RANK=${OMPI_COMM_WORLD_RANK:-"0"} lrank=$OMPI_COMM_WORLD_LOCAL_RANK function get_device_id() { $PYTHON <&1 | tee $LOG_FILE" APP="python3 -u $BERT_CMD" case $(expr $lrank % 8) in [0]) echo "work ${lrank} less than ${PADDLE_TRAINERS_NUM} on DCU $(expr $lrank % 4)" export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export FLAGS_selected_gpus=0 #export UCX_NET_DEVICES=mlx5_0:1 #export UCX_IB_PCI_BW=mlx5_0:50Gbs numactl --cpunodebind=0 --membind=0 ${APP} >& $LOG_FILE ;; [1]) echo "work ${lrank} less than ${PADDLE_TRAINERS_NUM} on DCU $(expr $lrank % 4)" export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export FLAGS_selected_gpus=1 #export UCX_NET_DEVICES=mlx5_0:1 #export UCX_IB_PCI_BW=mlx5_1:50Gbs numactl --cpunodebind=1 --membind=1 ${APP} >& $LOG_FILE ;; [2]) echo "work ${lrank} less than ${PADDLE_TRAINERS_NUM} on DCU $(expr $lrank % 4)" export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export FLAGS_selected_gpus=2 # export UCX_NET_DEVICES=mlx5_0:1 #export UCX_IB_PCI_BW=mlx5_2:50Gbs numactl --cpunodebind=2 --membind=2 ${APP} >& $LOG_FILE ;; [3]) echo "work ${lrank} less than ${PADDLE_TRAINERS_NUM} on DCU $(expr $lrank % 4)" export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export FLAGS_selected_gpus=3 # export UCX_NET_DEVICES=mlx5_0:1 #export UCX_IB_PCI_BW=mlx5_3:50Gbs numactl --cpunodebind=3 --membind=3 ${APP} >& $LOG_FILE ;; [4]) echo "work ${lrank} less than ${PADDLE_TRAINERS_NUM} on DCU $(expr $lrank % 4)" export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export FLAGS_selected_gpus=4 #export UCX_NET_DEVICES=mlx5_0:1 #export UCX_IB_PCI_BW=mlx5_0:50Gbs numactl --cpunodebind=4 --membind=4 ${APP} >& $LOG_FILE ;; [5]) echo "work ${lrank} less than ${PADDLE_TRAINERS_NUM} on DCU $(expr $lrank % 4)" export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export FLAGS_selected_gpus=5 #export UCX_NET_DEVICES=mlx5_0:1 #export UCX_IB_PCI_BW=mlx5_1:50Gbs numactl --cpunodebind=5 --membind=5 ${APP} >& $LOG_FILE ;; [6]) echo "work ${lrank} less than ${PADDLE_TRAINERS_NUM} on DCU $(expr $lrank % 4)" export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export FLAGS_selected_gpus=6 # export UCX_NET_DEVICES=mlx5_0:1 #export UCX_IB_PCI_BW=mlx5_2:50Gbs numactl --cpunodebind=6 --membind=6 ${APP} >& $LOG_FILE ;; [7]) echo "work ${lrank} less than ${PADDLE_TRAINERS_NUM} on DCU $(expr $lrank % 4)" export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export FLAGS_selected_gpus=7 # export UCX_NET_DEVICES=mlx5_0:1 #export UCX_IB_PCI_BW=mlx5_3:50Gbs numactl --cpunodebind=7 --membind=7 ${APP} >& $LOG_FILE ;; esac