#!/bin/bash set -e ENV=dsw MEGATRON_PATCH_PATH=/public/new-pai/Pai-Megatron-Patch MODEL_SIZE=8B BATCH_SIZE=1 GLOBAL_BATCH_SIZE=64 LR=1e-5 MIN_LR=1e-6 SEQ_LEN=2048 PAD_LEN=2048 EXTRA_VOCAB_SIZE=256 PR=fp16 TP=2 PP=1 AC=sel DO=true FL=true SP=true TE=true SAVE_INTERVAL=10000 DATASET_PATH=/public/new-pai/data/llama3-datasets//text_data_llama3_text_document PRETRAIN_CHECKPOINT_PATH=none TRAIN_TOKENS=20000000 WARMUP_TOKENS=100 OUTPUT_BASEPATH=./log TOKEN_PATH="/public/new-pai/Pai-Megatron-Patch/examples/all_tokenizer/llama3_tokenizer" bash run_pretrain_megatron_llama-dcu.sh \ $ENV \ $MEGATRON_PATCH_PATH \ $MODEL_SIZE \ $BATCH_SIZE \ $GLOBAL_BATCH_SIZE \ $LR \ $MIN_LR \ $SEQ_LEN \ $PAD_LEN \ $EXTRA_VOCAB_SIZE \ $PR \ $TP \ $PP \ $AC \ $DO \ $FL \ $SP \ $TE \ $SAVE_INTERVAL \ $DATASET_PATH \ $VALID_DATASET_PATH \ $PRETRAIN_CHECKPOINT_PATH \ $TRAIN_TOKENS \ $WARMUP_TOKENS \ $OUTPUT_BASEPATH \ $TOKEN_PATH