ENV=dsw MEGATRON_PATH=/public/new-pai/Pai-Megatron-Patch/ MEGATRON_PATCH_PATH=/public/new-pai/Pai-Megatron-Patch/Megatron-LM-231007/ MODEL_SIZE=7B BATCH_SIZE=1 GLOBAL_BATCH_SIZE=64 LR=1e-5 MIN_LR=1e-6 SEQ_LEN=128 PAD_LEN=128 EXTRA_VOCAB_SIZE=0 PR=fp16 TP=1 PP=1 AC=sel DO=false FL=true SP=true TE=false SAVE_INTERVAL=100000 DATASET_PATH="/public/new-pai/data/baichuan2-datasets/alpaca_data_51k.json" PRETRAIN_CHECKPOINT_PATH=/public/Pai-Megatron-Patch/examples/all_tokenizer/baichuan_tokenizer TRAIN_TOKENS=100000000 WARMUP_TOKENS=10000 OUTPUT_BASEPATH=./output sh run_pretrain_megatron_baichuan-dcu.sh $ENV $MEGATRON_PATH $MEGATRON_PATCH_PATH $MODEL_SIZE $BATCH_SIZE $GLOBAL_BATCH_SIZE $LR $MIN_LR $SEQ_LEN $PAD_LEN $EXTRA_VOCAB_SIZE $PR $TP $PP $AC $DO $FL $SP $TE $SAVE_INTERVAL $DATASET_PATH $PRETRAIN_CHECKPOINT_PATH $TRAIN_TOKENS $WARMUP_TOKENS $OUTPUT_BASEPATH