#!/bin/bash # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. set -ex export FLAGS_rocm_dir=/opt/dtk-21.04 export FLAGS_max_inplace_grad_add=2 export HSA_FORCE_FINE_GRAIN_PCIE=1 export NCCL_P2P_LEVEL=5 export USE_NV_INPUT=1 USE_UNCOMPRESSED_DATASET=1 BASE_DATA_DIR=${BASE_DATA_DIR:-"/data/mlperf/bert"} export USE_NV_INPUT UNCOMPRESSED_DATA_DIR=$BASE_DATA_DIR/hdf5/training-4320/hdf5_4320_shards_uncompressed VARLENGTH_DATA_DIR=$BASE_DATA_DIR/hdf5/training-4320/hdf5_4320_shards_varlength export DATA_DIR=$UNCOMPRESSED_DATA_DIR export EVAL_DIR=$BASE_DATA_DIR/hdf5/eval if [[ "$USE_NV_INPUT" == "1" && "$USE_UNCOMPRESSED_DATASET" == "0" ]]; then export DATA_DIR="$VARLENGTH_DATA_DIR" export EVAL_DIR=$BASE_DATA_DIR/hdf5/eval else export USE_UNCOMPRESSED_DATASET=1 fi export USE_UNCOMPRESSED_DATASET export TF_CKPT_PATH=$BASE_DATA_DIR/phase1/model.ckpt-28252.tf_pickled export BERT_CONFIG_PATH=$BASE_DATA_DIR/phase1/bert_config.json export PYTHON=python3 export PADDLE_TRAINER_ID=${OMPI_COMM_WORLD_RANK} export PADDLE_TRAINERS_NUM=${PADDLE_TRAINERS_NUM:-"1"} export PADDLE_TRAINER_ENDPOINTS=${PADDLE_TRAINER_ENDPOINTS:-"localhost:60045"} OMPI_COMM_WORLD_RANK=${OMPI_COMM_WORLD_RANK:-"0"} lrank=$OMPI_COMM_WORLD_LOCAL_RANK function get_device_id() { $PYTHON <& $LOG_FILE python3 -u $BERT_CMD