# shibing624/AdvertiseGen ENV=dsw MEGATRON_PATH=/public/new-pai/Pai-Megatron-Patch/ MODEL_SIZE=6B BATCH_SIZE=1 GLOBAL_BATCH_SIZE=2 LR=1e-4 MIN_LR=1e-6 S_T_SEQ_LEN=128 PAD_LEN=80 EXTRA_VOCAB_SIZE=1 PR=fp16 TP=1 PP=1 AC=sel DO=true FL=true SP=true SAVE_INTERVAL=100000 TRAIN_DATASET_PATH=/public/new-pai/data/chatglm-datasets/dev.json VALID_DATASET_PATH=/public/new-pai/data/chatglm-datasets/dev.json PRETRAIN_CHECKPOINT_PATH=/public/new-pai/Pai-Megatron-Patch/examples/all_tokenizer/chatglm_tokenizer TRAIN_TOKENS=1000000000 WARMUP_TOKENS=10000 OUTPUT_BASEPATH=./output sh run_pretrain_megatron_chatglm-dcu.sh $ENV $MEGATRON_PATH $MODEL_SIZE $BATCH_SIZE $GLOBAL_BATCH_SIZE $LR $MIN_LR $S_T_SEQ_LEN $PAD_LEN $EXTRA_VOCAB_SIZE $PR $TP $PP $AC $DO $FL $SP $SAVE_INTERVAL $TRAIN_DATASET_PATH $VALID_DATASET_PATH $PRETRAIN_CHECKPOINT_PATH $TRAIN_TOKENS $WARMUP_TOKENS $OUTPUT_BASEPATH