## System run parms export DGXNNODES=2 export DGXSYSTEM=$(basename $(readlink -f ${BASH_SOURCE[0]}) | sed 's/^config_//' | sed 's/\.sh$//' ) export WALLTIME=${WALLTIME:-"00:30:00"} ## DL params #export LR=${LR:-"2.0e-3"} #export TRAIN_BATCH_SIZE=${TRAIN_BATCH_SIZE:-64} #export TEST_BATCH_SIZE=${TEST_BATCH_SIZE:-64} #export WARMUP_STEPS=${WARMUP_STEPS:-200} #export REMAIN_STEPS=${REMAIN_STEPS:-6453} #export DECAY_INTERVAL=${DECAY_INTERVAL:-809} #export TARGET=${TARGET:-24.0} #export MAX_SEQ_LEN=${MAX_SEQ_LEN:-75} #export NUMEPOCHS=${NUMEPOCHS:-15} #export MATH=${MATH:-fp32} #export DIST_OPTS=${DIST_OPTS-"\ # --distributed-weight-update 2 \ # --dwu-num-blocks 1 \ # --dwu-num-chunks 2 \ # --dwu-num-rs-pg 2 \ # --dwu-num-ar-pg 2 \ # --dwu-num-ag-pg 0 \ # --dwu-grad-norm \ # "} #export EXTRA_OPTS=${EXTRA_OPTS-"\ # --fused-attention \ # --fused-xentropy \ # --no-log-all-ranks \ # "} ## System config params export DGXNGPU=4 export DGXSOCKETCORES=8 export DGXHT=2 # HT is on is 2, HT off is 1 export DGXNSOCKET=4 #export DGXNGPU=1 #export DGXSOCKETCORES=8 #export DGXHT=1 # HT is on is 2, HT off is 1 #export DGXNSOCKET=1