module rm compiler/rocm/2.9 module load compiler/rocm/3.9.1 module load mathlib/miopen/2.10_rocm3.9/gcc HOME_PATH=/public/home/xuanbaby/ export PATH=$HOME_PATH/rocm3.9-python3.6.8-tf1.15/bin:$PATH which python3 USER_HOME=/public/home/xuanbaby WORK_HOME=`pwd` BERT_DIR=${WORK_HOME}/pre_tf2x SQUAD_VERSION=v1.1 OUTPUT_DIR=${WORK_HOME}/SQuAD_v1.1_tf_v1.2 #python3 ../data/create_finetuning_data.py \ # --squad_data_file=${SQUAD_DIR}/train-${SQUAD_VERSION}.json \ # --vocab_file=${BERT_DIR}/vocab.txt \ ## --train_data_output_path=${OUTPUT_DIR}/squad_${SQUAD_VERSION}_train.tf_record \ # --meta_data_file_path=${OUTPUT_DIR}/squad_${SQUAD_VERSION}_meta_data \ # --eval_data_output_path=${OUTPUT_DIR}/squad_${SQUAD_VERSION}_eval.tf_record \ # --fine_tuning_task_type=squad --max_seq_length=384 export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 #HSA_FORCE_FINE_GRAIN_PCIE=1 numactl --cpunodebind=4,5,6,7 --membind=4,5,6,7 python3 run_squad_xuan.py \ numactl --cpunodebind=0,1,2,3 --membind=0,1,2,3 python3 run_squad_xuan.py \ --vocab_file=${BERT_DIR}/vocab.txt \ --bert_config_file=${BERT_DIR}/bert_config.json \ --mode='train_and_eval' \ --input_meta_data_path=${WORK_HOME}/SQuAD_v1.1_tf/squad_v1.1_meta_data \ --train_data_path=${WORK_HOME}/SQuAD_v1.1_tf/squad_v1.1_train.tf_record \ --train_batch_size=128 \ --predict_batch_size=4 \ --learning_rate=2e-5 \ --log_steps=1 \ --init_checkpoint=${BERT_DIR}/bert_model.ckpt \ --num_gpus=4 \ --distribution_strategy=mirrored \ --model_dir=${WORK_HOME}/model_squad_v2 \ --run_eagerly=False \ --predict_file=${WORK_HOME}/SQuAD_v1.1_tf/dev-v1.1.json \