#!/bin/bash #for singnode source `pwd`/config_DGX1.sh set -e # start timing start=$(date +%s) start_fmt=$(date +%Y-%m-%d\ %r) echo "STARTING TIMING RUN AT $start_fmt" export NCCL_DEBUG=${NCCL_DEBUG:-"WARN"} # run benchmark set -x DATASET_DIR='../wmt16_de_en/' PREPROC_DATADIR='./preproc_data' RESULTS_DIR='gnmt_wmt16' DIST_OPTS=${DIST_OPTS:-""} EXTRA_OPTS=${EXTRA_OPTS:-""} declare -a CMD CMD=( 'python3' '-u' '-m' 'bind_launch' "--nsockets_per_node=${DGXNSOCKET}" \ "--ncores_per_socket=${DGXSOCKETCORES}" "--nproc_per_node=${DGXNGPU}" "--no_hyperthreads") echo "running benchmark" #for 1 node fp32 training "${CMD[@]}" train.py \ --save ${RESULTS_DIR} \ --dataset-dir ${DATASET_DIR} \ --preproc-data-dir ${PREPROC_DATADIR}/${MAX_SEQ_LEN} \ --target-bleu $TARGET \ --epochs "${NUMEPOCHS}" \ --math ${MATH} \ --max-length-train ${MAX_SEQ_LEN} \ --print-freq 10 \ --train-batch-size $TRAIN_BATCH_SIZE \ --test-batch-size $TEST_BATCH_SIZE \ --optimizer Adam \ --lr $LR \ --warmup-steps $WARMUP_STEPS \ --remain-steps $REMAIN_STEPS \ --decay-interval $DECAY_INTERVAL \ $DIST_OPTS \ $EXTRA_OPTS ; ret_code=$? set +x sleep 3 if [[ $ret_code != 0 ]]; then exit $ret_code; fi # end timing end=$(date +%s) end_fmt=$(date +%Y-%m-%d\ %r) echo "ENDING TIMING RUN AT $end_fmt" # report result result=$(( $end - $start )) result_name="RNN_TRANSLATOR" echo "RESULT,$result_name,,$result,nvidia,$start_fmt"