#!/bin/bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. set -ex export FLAGS_rocm_dir=/public/software/compiler/rocm/dtk-21.04/ export FLAGS_max_inplace_grad_add=2 export HSA_FORCE_FINE_GRAIN_PCIE=1 export NCCL_P2P_LEVEL=5 export USE_NV_INPUT=1 USE_UNCOMPRESSED_DATASET=1 BASE_DATA_DIR=${BASE_DATA_DIR:-"/public/software/apps/DeepLearning/Data/mlperf/bert"} export USE_NV_INPUT UNCOMPRESSED_DATA_DIR=$BASE_DATA_DIR/hdf5/training-4320/hdf5_4320_shards_uncompressed VARLENGTH_DATA_DIR=$BASE_DATA_DIR/hdf5/training-4320/hdf5_4320_shards_varlength export DATA_DIR=$UNCOMPRESSED_DATA_DIR export EVAL_DIR=$BASE_DATA_DIR/hdf5/eval if [[ "$USE_NV_INPUT" == "1" && "$USE_UNCOMPRESSED_DATASET" == "0" ]]; then export DATA_DIR="$VARLENGTH_DATA_DIR" export EVAL_DIR=$BASE_DATA_DIR/hdf5/eval else export USE_UNCOMPRESSED_DATASET=1 fi export USE_UNCOMPRESSED_DATASET export TF_CKPT_PATH=$BASE_DATA_DIR/phase1/model.ckpt-28252.tf_pickled export BERT_CONFIG_PATH=$BASE_DATA_DIR/phase1/bert_config.json export PYTHON=python3 export PADDLE_TRAINER_ID=${OMPI_COMM_WORLD_RANK} export PADDLE_TRAINERS_NUM=${PADDLE_TRAINERS_NUM:-"1"} export PADDLE_TRAINER_ENDPOINTS=${PADDLE_TRAINER_ENDPOINTS:-""} OMPI_COMM_WORLD_RANK=${OMPI_COMM_WORLD_RANK:-"0"} lrank=$OMPI_COMM_WORLD_LOCAL_RANK function get_device_id() { $PYTHON <& $LOG_FILE ;; [1]) echo "work ${lrank} less than ${PADDLE_TRAINERS_NUM} on DCU $(expr $lrank % 4)" export HIP_VISIBLE_DEVICES=0,1,2,3 export FLAGS_selected_gpus=1 export UCX_NET_DEVICES=mlx5_1:1 export UCX_IB_PCI_BW=mlx5_1:50Gbs numactl --cpunodebind=1 --membind=1 ${APP} >& $LOG_FILE ;; [2]) echo "work ${lrank} less than ${PADDLE_TRAINERS_NUM} on DCU $(expr $lrank % 4)" export HIP_VISIBLE_DEVICES=0,1,2,3 export FLAGS_selected_gpus=2 export UCX_NET_DEVICES=mlx5_2:1 export UCX_IB_PCI_BW=mlx5_2:50Gbs numactl --cpunodebind=2 --membind=2 ${APP} >& $LOG_FILE ;; [3]) echo "work ${lrank} less than ${PADDLE_TRAINERS_NUM} on DCU $(expr $lrank % 4)" export HIP_VISIBLE_DEVICES=0,1,2,3 export FLAGS_selected_gpus=3 export UCX_NET_DEVICES=mlx5_3:1 export UCX_IB_PCI_BW=mlx5_3:50Gbs numactl --cpunodebind=3 --membind=3 ${APP} >& $LOG_FILE ;; esac