run_squad.sh 1.58 KB
Newer Older
hepj987's avatar
hepj987 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
module rm compiler/rocm/2.9
module load compiler/rocm/3.9.1
module load mathlib/miopen/2.10_rocm3.9/gcc
HOME_PATH=/public/home/xuanbaby/
export PATH=$HOME_PATH/rocm3.9-python3.6.8-tf1.15/bin:$PATH
which python3
USER_HOME=/public/home/xuanbaby
WORK_HOME=`pwd`
BERT_DIR=${WORK_HOME}/pre_tf2x
SQUAD_VERSION=v1.1
OUTPUT_DIR=${WORK_HOME}/SQuAD_v1.1_tf_v1.2
#python3 ../data/create_finetuning_data.py \
# --squad_data_file=${SQUAD_DIR}/train-${SQUAD_VERSION}.json \
# --vocab_file=${BERT_DIR}/vocab.txt \
## --train_data_output_path=${OUTPUT_DIR}/squad_${SQUAD_VERSION}_train.tf_record \
# --meta_data_file_path=${OUTPUT_DIR}/squad_${SQUAD_VERSION}_meta_data \
# --eval_data_output_path=${OUTPUT_DIR}/squad_${SQUAD_VERSION}_eval.tf_record \
# --fine_tuning_task_type=squad --max_seq_length=384

export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
#HSA_FORCE_FINE_GRAIN_PCIE=1 numactl --cpunodebind=4,5,6,7 --membind=4,5,6,7 python3 run_squad_xuan.py \
numactl --cpunodebind=0,1,2,3 --membind=0,1,2,3 python3 run_squad_xuan.py \
  --vocab_file=${BERT_DIR}/vocab.txt \
  --bert_config_file=${BERT_DIR}/bert_config.json \
  --mode='train_and_eval' \
  --input_meta_data_path=${WORK_HOME}/SQuAD_v1.1_tf/squad_v1.1_meta_data \
  --train_data_path=${WORK_HOME}/SQuAD_v1.1_tf/squad_v1.1_train.tf_record \
  --train_batch_size=128 \
  --predict_batch_size=4 \
  --learning_rate=2e-5 \
  --log_steps=1 \
  --init_checkpoint=${BERT_DIR}/bert_model.ckpt \
  --num_gpus=4 \
  --distribution_strategy=mirrored \
  --model_dir=${WORK_HOME}/model_squad_v2 \
  --run_eagerly=False \
  --predict_file=${WORK_HOME}/SQuAD_v1.1_tf/dev-v1.1.json \