ENV=dsw MEGATRON_PATCH_PATH=/public/new-pai/Pai-Megatron-Patch MODEL_SIZE=7B BATCH_SIZE=1 GLOBAL_BATCH_SIZE=8 LR=1e-5 MIN_LR=1e-6 SEQ_LEN=2048 PAD_LEN=2048 EXTRA_VOCAB_SIZE=0 PR=fp16 TP=2 PP=1 AC=sel DO=true FL=true SP=true TE=true SAVE_INTERVAL=10000 DATASET_PATH=/public/new-pai/data/baichuan2-datasets/alpaca_data_51k.json PRETRAIN_CHECKPOINT_PATH=/public/new-pai/Pai-Megatron-Patch/examples/all_tokenizer/baichuan2_tokenizer TRAIN_TOKENS=100000000 WARMUP_TOKENS=10000 OUTPUT_BASEPATH=./output sh run_pretrain_megatron_baichuan-dcu.sh \ $ENV $MEGATRON_PATCH_PATH $MODEL_SIZE $BATCH_SIZE $GLOBAL_BATCH_SIZE $LR $MIN_LR $SEQ_LEN $PAD_LEN $EXTRA_VOCAB_SIZE $PR $TP $PP $AC $DO $FL $SP $TE $SAVE_INTERVAL $DATASET_PATH $PRETRAIN_CHECKPOINT_PATH $TRAIN_TOKENS $WARMUP_TOKENS $OUTPUT_BASEPATH