"...lm-evaluation-harness.git" did not exist on "908ac2b241ecba364e3dc1b500971f5a4fb36bb2"
Commit d3dd8642 authored by Rayyyyy's avatar Rayyyyy
Browse files

First add

parents
Pipeline #1259 failed with stages
in 0 seconds
#!/bin/bash
# Runs the "Yuan-moe" parameter model inference
GPUS_PER_NODE=8
MAX_LENGTH=1024
MASTER_PORT=6000
MASTER_ADDR=localhost
NNODES=1
NODE_RANK=0
WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES))
if [ "$TEMP" == "" ]; then
TEMP=0
fi
if [ "$TOP_P" == "" ]; then
TOP_P=0.0
fi
if [ "$TOP_K" == "" ]; then
TOP_K=1
fi
CHECKPOINT_PATH=<Specify path>
TOKENIZER_MODEL_PATH=<Specify path>
MATH_DATA=<Specify path>
OUTPUT_PATH=<Specify path>
GPT_ARGS="
--micro-batch-size 1 \
--tensor-model-parallel-size 1 \
--pipeline-model-parallel-size 8 \
--num-layers 24 \
--hidden-size 2048 \
--use-lf-gate \
--rotary-base 40890 \
--max-tokens-to-oom 16384 \
--lf-conv2d-group 1 \
--lf-conv2d-num-pad 0 \
--position-embedding-type rope \
--no-embedding-dropout \
--use-flash-attn \
--flash-attn-drop 0.0 \
--attention-dropout 0 \
--fim-rate 0.0 \
--hidden-dropout 0 \
--norm-dtype RMSNorm \
--disable-bias-linear \
--reset-position-ids \
--swiglu \
--num-attention-heads 16 \
--seq-length 16384 \
--max-position-embeddings 16384 \
--no-async-tensor-model-parallel-allreduce \
--bf16 \
--kv-channels 256 \
--num-attention-router-heads 16384 \
--rotary-percent 0.5 \
--use-attention-router \
--no-masked-softmax-fusion \
--use-fp32-router \
--num-experts 32 \
--moe-router-load-balancing-type none \
--moe-router-topk 2 \
--moe-grouped-gemm \
--repetition-penalty 1.0 \
--temp $TEMP \
--top_p $TOP_P \
--top_k $TOP_K \
--seed $RANDOM
"
DISTRIBUTED_ARGS="
--nproc_per_node $GPUS_PER_NODE \
--nnodes $NNODES \
--node_rank $NODE_RANK \
--master_addr $MASTER_ADDR \
--master_port $MASTER_PORT
"
torchrun $DISTRIBUTED_ARGS tasks/MMLU/eval_for_mmlu.py \
$GPT_ARGS \
--tokenizer-type "YuanTokenizer" \
--tokenizer-model-path $TOKENIZER_MODEL_PATH \
--math_datapath ${MATH_DATA} \
--distributed-backend nccl \
--num_samples_per_task 1 \
--max_len $MAX_LENGTH \
--output_path $OUTPUT_PATH \
--load $CHECKPOINT_PATH
#!/bin/bash
# Evaluate natural question test data given Wikipedia embeddings and pretrained
# ICT model or a finetuned model for Natural Question task
# Datasets can be downloaded from the following link:
# https://github.com/facebookresearch/DPR/blob/master/data/download_data.py
EVIDENCE_DATA_DIR=<Specify path of Wikipedia dataset>
EMBEDDING_PATH=<Specify path of the embeddings>
CHECKPOINT_PATH=<Specify path of pretrained ICT model or finetuned model>
QA_FILE=<Path of the natural question dev or test dataset>
python tasks/main.py \
--task RETRIEVER-EVAL \
--tokenizer-type BertWordPieceLowerCase \
--num-layers 12 \
--hidden-size 768 \
--num-attention-heads 12 \
--tensor-model-parallel-size 1 \
--micro-batch-size 128 \
--seq-length 512 \
--max-position-embeddings 512 \
--load ${CHECKPOINT_PATH} \
--evidence-data-path ${EVIDENCE_DATA_DIR} \
--embedding-path ${EMBEDDING_PATH} \
--retriever-seq-length 256 \
--vocab-file bert-vocab.txt\
--qa-data-test ${QA_FILE} \
--faiss-use-gpu \
--retriever-report-topk-accuracies 1 5 20 100 \
--fp16 \
--indexer-log-interval 1000 \
--indexer-batch-size 128
#!/bin/bash
WORLD_SIZE=8
DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
--nnodes 1 \
--node_rank 0 \
--master_addr localhost \
--master_port 6000"
TASK="LAMBADA"
VALID_DATA=<lambada path>
VOCAB_FILE=gpt2-vocab.json
MERGE_FILE=gpt2-merges.txt
CHECKPOINT=checkpoints/gpt2_345m
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
--task $TASK \
--valid-data $VALID_DATA \
--tokenizer-type GPT2BPETokenizer \
--strict-lambada \
--vocab-file $VOCAB_FILE \
--merge-file $MERGE_FILE \
--load $CHECKPOINT \
--tensor-model-parallel-size 1 \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
--batch-size 8 \
--seq-length 1024 \
--max-position-embeddings 1024 \
--log-interval 10 \
--fp16 \
--no-load-optim \
--no-load-rng
#!/bin/bash
CONTROLLER_HOST="0.0.0.0"
CONTROLLER_PORT=8503
MODEL_WORKER_HOST="0.0.0.0"
MODEL_WORKER_PORT=8504
API_SERVER_HOST="0.0.0.0"
API_SERVER_PORT=8505
MODEL_PATH="/mnt/models/Yuan2-2B-Mars-hf/"
start_controller() {
echo "Starting controller service..."
python3 -m fastchat.serve.controller --host ${CONTROLLER_HOST} --port ${CONTROLLER_PORT} > controller.log 2>&1 &
}
start_model_worker() {
echo "Starting model worker service..."
python3 -m fastchat.serve.model_worker --model-path ${MODEL_PATH} --model-names "yuan2" --controller-address http://${CONTROLLER_HOST}:${CONTROLLER_PORT} --worker-address http://${MODEL_WORKER_HOST}:${MODEL_WORKER_PORT} --host ${MODEL_WORKER_HOST} --port ${MODEL_WORKER_PORT} --dtype bfloat16 --debug True > model_worker.log 2>&1 &
}
start_openai_api_server() {
echo "Starting OpenAI API server..."
python3 -m fastchat.serve.openai_api_server --host ${API_SERVER_HOST} --port ${API_SERVER_PORT} --controller-address http://${CONTROLLER_HOST}:${CONTROLLER_PORT} > server.log 2>&1 &
}
stop_controller() {
echo "Stopping controller service..."
pids=$(pgrep -f "python3 -m fastchat.serve.controller")
if [ -n "$pids" ]; then
kill -9 $pids
echo "Controller service stopped."
else
echo "Controller service is not running."
fi
}
stop_model_worker() {
echo "Stopping model worker service..."
pids=$(pgrep -f "python3 -m fastchat.serve.model_worker")
if [ -n "$pids" ]; then
kill -9 $pids
echo "Model worker service stopped."
else
echo "Model worker service is not running."
fi
}
stop_openai_api_server() {
echo "Stopping OpenAI API server..."
pids=$(pgrep -f "python3 -m fastchat.serve.openai_api_server")
if [ -n "$pids" ]; then
kill -9 $pids
echo "OpenAI API server stopped."
else
echo "OpenAI API server is not running."
fi
}
stop_services() {
echo "Stopping services..."
stop_controller
stop_model_worker
stop_openai_api_server
}
check_status() {
echo "Checking status..."
if pgrep -f "python3 -m fastchat.serve.controller" &> /dev/null; then
echo "Controller service is running."
else
echo "Controller service is not running."
fi
if pgrep -f "python3 -m fastchat.serve.model_worker" &> /dev/null; then
echo "Model worker service is running."
else
echo "Model worker service is not running."
fi
if pgrep -f "python3 -m fastchat.serve.openai_api_server" &> /dev/null; then
echo "OpenAI API server is running."
else
echo "OpenAI API server is not running."
fi
}
case "$1" in
"start_all")
start_controller
sleep 5 # Wait for controller to start before starting other services
start_model_worker
sleep 15 # Wait for worker to start before starting other services
start_openai_api_server
;;
"start_controller")
start_controller
;;
"start_worker")
start_model_worker
;;
"start_server")
start_openai_api_server
;;
"stop_all")
stop_services
;;
"stop_controller")
stop_controller
;;
"stop_worker")
stop_model_worker
;;
"stop_server")
stop_openai_api_server
;;
"status")
check_status
;;
*)
echo "Usage: $0 {start_all|start_controller|start_worker|start_server|stop_all|stop_controller|stop_worker|stop_server|status}"
exit 1
;;
esac
exit 0
#!/bin/bash
WORLD_SIZE=8
DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
--nnodes 1 \
--node_rank 0 \
--master_addr localhost \
--master_port 6000"
TRAIN_DATA="data/glue_data/MNLI/train.tsv"
VALID_DATA="data/glue_data/MNLI/dev_matched.tsv \
data/glue_data/MNLI/dev_mismatched.tsv"
PRETRAINED_CHECKPOINT=checkpoints/bert_345m
VOCAB_FILE=bert-vocab.txt
CHECKPOINT_PATH=checkpoints/bert_345m_mnli
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
--task MNLI \
--seed 1234 \
--train-data $TRAIN_DATA \
--valid-data $VALID_DATA \
--tokenizer-type BertWordPieceLowerCase \
--vocab-file $VOCAB_FILE \
--epochs 5 \
--pretrained-checkpoint $PRETRAINED_CHECKPOINT \
--tensor-model-parallel-size 1 \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
--micro-batch-size 8 \
--lr 5.0e-5 \
--lr-decay-style linear \
--lr-warmup-fraction 0.065 \
--seq-length 512 \
--max-position-embeddings 512 \
--save-interval 500000 \
--save $CHECKPOINT_PATH \
--log-interval 10 \
--eval-interval 100 \
--eval-iters 50 \
--weight-decay 1.0e-1 \
--fp16
#!/bin/bash
WORLD_SIZE=8
DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
--nnodes 1 \
--node_rank 0 \
--master_addr localhost \
--master_port 6000"
TRAIN_DATA="data/RACE/train/middle"
VALID_DATA="data/RACE/dev/middle \
data/RACE/dev/high"
VOCAB_FILE=bert-vocab.txt
PRETRAINED_CHECKPOINT=checkpoints/bert_345m
CHECKPOINT_PATH=checkpoints/bert_345m_race
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
--task RACE \
--seed 1234 \
--train-data $TRAIN_DATA \
--valid-data $VALID_DATA \
--tokenizer-type BertWordPieceLowerCase \
--vocab-file $VOCAB_FILE \
--epochs 3 \
--pretrained-checkpoint $PRETRAINED_CHECKPOINT \
--tensor-model-parallel-size 1 \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
--micro-batch-size 4 \
--lr 1.0e-5 \
--lr-decay-style linear \
--lr-warmup-fraction 0.06 \
--seq-length 512 \
--max-position-embeddings 512 \
--save-interval 100000 \
--save $CHECKPOINT_PATH \
--log-interval 10 \
--eval-interval 100 \
--eval-iters 50 \
--weight-decay 1.0e-1 \
--clip-grad 1.0 \
--hidden-dropout 0.1 \
--attention-dropout 0.1 \
--fp16
#!/bin/bash
# Finetune a BERT or pretrained ICT model using Google natural question data
# Datasets can be downloaded from the following link:
# https://github.com/facebookresearch/DPR/blob/master/data/download_data.py
WORLD_SIZE=8
DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
--nnodes 1 \
--node_rank 0 \
--master_addr localhost \
--master_port 6000"
CHECKPOINT_PATH=<Specify path for the finetuned retriever model>
# Load either of the below
BERT_LOAD_PATH=<Path of BERT pretrained model>
PRETRAINED_CHECKPOINT=<Path of Pretrained ICT model>
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
--task RET-FINETUNE-NQ \
--train-with-neg \
--train-hard-neg 1 \
--pretrained-checkpoint ${PRETRAINED_CHECKPOINT} \
--num-layers 12 \
--hidden-size 768 \
--num-attention-heads 12 \
--tensor-model-parallel-size 1 \
--tokenizer-type BertWordPieceLowerCase \
--train-data nq-train.json \
--valid-data nq-dev.json \
--save ${CHECKPOINT_PATH} \
--load ${CHECKPOINT_PATH} \
--vocab-file bert-vocab.txt \
--bert-load ${BERT_LOAD_PATH} \
--save-interval 5000 \
--log-interval 10 \
--eval-interval 20000 \
--eval-iters 100 \
--indexer-log-interval 1000 \
--faiss-use-gpu \
--DDP-impl torch \
--fp16 \
--retriever-report-topk-accuracies 1 5 10 20 100 \
--seq-length 512 \
--retriever-seq-length 256 \
--max-position-embeddings 512 \
--retriever-score-scaling \
--epochs 80 \
--micro-batch-size 8 \
--eval-micro-batch-size 16 \
--indexer-batch-size 128 \
--lr 2e-5 \
--lr-warmup-fraction 0.01 \
--weight-decay 1e-1
#!/bin/bash
TENSOR_MODEL_PARALLEL_SIZE=2
VOCAB_FILE=bert-vocab.txt
CHECKPOINT_PATH=checkpoints/bert_345m
WORLD_SIZE=$TENSOR_MODEL_PARALLEL_SIZE python tools/merge_mp_partitions.py \
--model-type BERT \
--tensor-model-parallel-size $TENSOR_MODEL_PARALLEL_SIZE \
--tokenizer-type BertWordPieceLowerCase \
--vocab-file $VOCAB_FILE \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
--seq-length 512 \
--max-position-embeddings 512 \
--load $CHECKPOINT_PATH
#!/bin/bash
#merge checkpoint along the pipeline
LOAD_CHECKPOINT_PATH=<Specify the loaded ckpt path>
SAVE_CHECKPOINT_PATH=<Specify the stored ckpt path>
TOKENIZER_MODEL_PATH=<Specify tokenizer model path>
export CUDA_DEVICE_MAX_CONNECTIONS=1
export PATH=/opt/conda/bin/:$PATH
if [ ! -d $SAVE_CHECKPOINT_PATH ]; then
mkdir $SAVE_CHECKPOINT_PATH
fi
python tools/merge_pp_partitions.py \
--tokenizer-model-path $TOKENIZER_MODEL_PATH \
--tensor-model-parallel-size 8 \
--target-tensor-model-parallel-size 8 \
--pipeline-model-parallel-size 16 \
--target-pipeline-model-parallel-size 1 \
--pipeline-model-parallel-method block \
--pipeline-model-parallel-blocks 2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,2 \
--target-pipeline-model-parallel-blocks 42 \
--tensor-generate-layer 0,1,2,3,4,5,6,7 \
--tokenizer-type YuanTokenizer \
--num-layers 42 \
--hidden-size 8192 \
--num-attention-heads 64 \
--seq-length 4096 \
--max-position-embeddings 4096 \
--use-lf-gate \
--lf-conv2d-group 1 \
--lf-conv2d-num-pad 1 \
--position-embedding-type rope \
--no-embedding-dropout \
--flash-attn-drop 0.1 \
--fim-rate 0.5 \
--fim-spm-rate 0.5 \
--attention-dropout 0 \
--hidden-dropout 0 \
--norm-dtype RMSNorm \
--disable-bias-linear \
--reset-position-ids \
--use-flash-attn \
--swiglu \
--DDP-impl local \
--adam-beta1 0.9 \
--adam-beta2 0.95 \
--bf16 \
--save-interval 1 \
--recompute-method block \
--recompute-granularity full \
--recompute-num-layers 1 \
--load $LOAD_CHECKPOINT_PATH \
--save $SAVE_CHECKPOINT_PATH \
--micro-batch-size 1 \
--global-batch-size 1152 \
--use-distributed-optimizer \
--lr 0.00009 \
--train-iters 63578 \
--lr-decay-iters 63578 \
--lr-decay-style cosine \
--min-lr 0.9e-5 \
--weight-decay 1e-1 \
--no-load-optim \
--use-distributed-optimizer \
--use-cpu-initialization \
--process-checkpoint \
--data-impl mmap
du -sh $SAVE_CHECKPOINT_PATH
#!/bin/bash
#merge checkpoint along the pipeline
LOAD_CHECKPOINT_PATH=$1
#<Specify the loaded ckpt path>
SAVE_CHECKPOINT_PATH=$2
#<Specify the stored ckpt path>
TOKENIZER_MODEL_PATH=$3
#<Specify tokenizer model path>
export CUDA_DEVICE_MAX_CONNECTIONS=1
export PATH=/opt/conda/bin/:$PATH
if [ ! -d $SAVE_CHECKPOINT_PATH ]; then
mkdir $SAVE_CHECKPOINT_PATH
fi
python tools/merge_pp_partitions.py \
--tokenizer-model-path $TOKENIZER_MODEL_PATH \
--tensor-model-parallel-size 8 \
--target-tensor-model-parallel-size 8 \
--pipeline-model-parallel-size 32 \
--target-pipeline-model-parallel-size 1 \
--pipeline-model-parallel-method block \
--pipeline-model-parallel-blocks 2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2 \
--target-pipeline-model-parallel-blocks 84 \
--tensor-generate-layer 0,1,2,3,4,5,6,7 \
--tokenizer-type YuanTokenizer \
--num-layers 84 \
--hidden-size 8192 \
--num-attention-heads 64 \
--seq-length 4096 \
--max-position-embeddings 4096 \
--use-lf-gate \
--lf-conv2d-group 1 \
--lf-conv2d-num-pad 1 \
--position-embedding-type rope \
--flash-attn-drop 0.1\
--fim-rate 0.5\
--fim-spm-rate 0.5\
--attention-dropout 0\
--hidden-dropout 0\
--norm-dtype RMSNorm \
--disable-bias-linear \
--reset-position-ids \
--use-flash-attn \
--swiglu \
--fused-rmsnorm \
--DDP-impl local \
--bf16 \
--save-interval 1 \
--recompute-method block \
--recompute-granularity full \
--recompute-num-layers 2 \
--load $LOAD_CHECKPOINT_PATH \
--save $SAVE_CHECKPOINT_PATH \
--micro-batch-size 1 \
--global-batch-size 1152 \
--no-load-optim \
--use-distributed-optimizer \
--lr 0.0001 \
--train-iters 63578 \
--lr-decay-iters 63578 \
--lr-decay-style cosine \
--min-lr 1.0e-5 \
--weight-decay 1e-1 \
--process-checkpoint \
--use-cpu-initialization \
--data-impl mmap
du -sh $SAVE_CHECKPOINT_PATH
#!/bin/bash
#merge checkpoint along the pipeline
LOAD_CHECKPOINT_PATH=$1
#<Specify the loaded ckpt path>
SAVE_CHECKPOINT_PATH=$2
#<Specify the stored ckpt path>
TOKENIZER_MODEL_PATH=$3
#<Specify tokenizer model path>
export CUDA_DEVICE_MAX_CONNECTIONS=1
export PATH=/opt/conda/bin/:$PATH
if [ ! -d $SAVE_CHECKPOINT_PATH ]; then
mkdir $SAVE_CHECKPOINT_PATH
fi
python tools/merge_pp_partitions.py \
--tokenizer-model-path $TOKENIZER_MODEL_PATH \
--tensor-model-parallel-size 4 \
--target-tensor-model-parallel-size 4 \
--pipeline-model-parallel-size 8 \
--target-pipeline-model-parallel-size 1 \
--pipeline-model-parallel-method block \
--pipeline-model-parallel-blocks 3,3,3,3,3,3,3,3 \
--target-pipeline-model-parallel-blocks 24 \
--tensor-generate-layer 0,1,2,3 \
--tokenizer-type YuanTokenizer \
--num-layers 24 \
--hidden-size 2048 \
--num-attention-heads 16 \
--kv-channels 256 \
--seq-length 4096 \
--max-position-embeddings 4096 \
--use-lf-gate \
--lf-conv2d-group 1 \
--lf-conv2d-num-pad 1 \
--position-embedding-type rope \
--no-embedding-dropout \
--flash-attn-drop 0.1 \
--fim-rate 0.5 \
--fim-spm-rate 0.5 \
--attention-dropout 0 \
--hidden-dropout 0 \
--norm-dtype RMSNorm \
--disable-bias-linear \
--reset-position-ids \
--use-flash-attn \
--swiglu \
--DDP-impl local \
--adam-beta1 0.9 \
--adam-beta2 0.95 \
--bf16 \
--rotary-percent 0.5 \
--use-attention-router \
--num-attention-router-heads 4096 \
--no-masked-softmax-fusion \
--use-fp32-router \
--num-experts 32 \
--moe-router-load-balancing-type none \
--moe-router-topk 2 \
--moe-grouped-gemm \
--save-interval 1 \
--recompute-method block \
--recompute-granularity full \
--recompute-num-layers 1 \
--load $LOAD_CHECKPOINT_PATH \
--save $SAVE_CHECKPOINT_PATH \
--micro-batch-size 1 \
--global-batch-size 1152 \
--use-distributed-optimizer \
--lr 0.00009 \
--train-iters 63578 \
--lr-decay-iters 63578 \
--lr-decay-style cosine \
--min-lr 0.9e-5 \
--weight-decay 1e-1 \
--no-load-optim \
--use-distributed-optimizer \
--use-cpu-initialization \
--process-checkpoint \
--data-impl mmap
du -sh $SAVE_CHECKPOINT_PATH
#!/bin/bash
#merge checkpoint along the pipeline
LOAD_CHECKPOINT_PATH=$1
#<Specify the loaded ckpt path>
SAVE_CHECKPOINT_PATH=$2
#<Specify the stored ckpt path>
TOKENIZER_MODEL_PATH=$3
#<Specify tokenizer model path>
export CUDA_DEVICE_MAX_CONNECTIONS=1
export PATH=/opt/conda/bin/:$PATH
if [ ! -d $SAVE_CHECKPOINT_PATH ]; then
mkdir $SAVE_CHECKPOINT_PATH
fi
python tools/merge_pp_partitions.py \
--tokenizer-model-path $TOKENIZER_MODEL_PATH \
--tensor-model-parallel-size 4 \
--target-tensor-model-parallel-size 4 \
--pipeline-model-parallel-size 16 \
--target-pipeline-model-parallel-size 1 \
--pipeline-model-parallel-method block \
--pipeline-model-parallel-blocks 2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,2 \
--target-pipeline-model-parallel-blocks 42 \
--tensor-generate-layer 0,1,2,3 \
--tokenizer-type YuanTokenizer \
--num-layers 42 \
--hidden-size 8192 \
--num-attention-heads 64 \
--seq-length 4096 \
--max-position-embeddings 4096 \
--use-lf-gate \
--lf-conv2d-group 1 \
--lf-conv2d-num-pad 1 \
--position-embedding-type rope \
--no-embedding-dropout \
--flash-attn-drop 0.1 \
--fim-rate 0.5 \
--fim-spm-rate 0.5 \
--attention-dropout 0 \
--hidden-dropout 0 \
--norm-dtype RMSNorm \
--disable-bias-linear \
--reset-position-ids \
--use-flash-attn \
--swiglu \
--DDP-impl local \
--adam-beta1 0.9 \
--adam-beta2 0.95 \
--bf16 \
--save-interval 1 \
--recompute-method block \
--recompute-granularity full \
--recompute-num-layers 1 \
--load $LOAD_CHECKPOINT_PATH \
--save $SAVE_CHECKPOINT_PATH \
--micro-batch-size 1 \
--global-batch-size 1152 \
--use-distributed-optimizer \
--lr 0.00009 \
--train-iters 63578 \
--lr-decay-iters 63578 \
--lr-decay-style cosine \
--min-lr 0.9e-5 \
--weight-decay 1e-1 \
--no-load-optim \
--use-distributed-optimizer \
--use-cpu-initialization \
--process-checkpoint \
--data-impl mmap
du -sh $SAVE_CHECKPOINT_PATH
#!/bin/bash
#merge checkpoint along the tensor
LOAD_CHECKPOINT_PATH=<Specify the loaded ckpt path>
SAVE_CHECKPOINT_PATH=<Specify the stored ckpt path >
TOKENIZER_MODEL_PATH=<Specify tokenizer model path>
export CUDA_DEVICE_MAX_CONNECTIONS=1
if [ ! -d $SAVE_CHECKPOINT_PATH ]; then
mkdir $SAVE_CHECKPOINT_PATH
fi
python tools/merge_tp_partitions.py \
--tensor-model-parallel-size 2 \
--target-tensor-model-parallel-size 1 \
--pipeline-model-parallel-size 4 \
--target-pipeline-model-parallel-size 4 \
--tokenizer-type YuanTokenizer \
--tokenizer-model-path $TOKENIZER_MODEL_PATH \
--num-layers 42 \
--hidden-size 8192 \
--num-attention-heads 64 \
--seq-length 4096 \
--max-position-embeddings 4096 \
--use-lf-gate \
--lf-conv2d-group 1 \
--lf-conv2d-num-pad 1 \
--position-embedding-type rope \
--no-embedding-dropout \
--flash-attn-drop 0.1 \
--fim-rate 0.5 \
--fim-spm-rate 0.5 \
--attention-dropout 0 \
--norm-dtype RMSNorm \
--attention-dropout 0 \
--hidden-dropout 0 \
--disable-bias-linear \
--reset-position-ids \
--use-flash-attn \
--swiglu \
--adam-beta1 0.9 \
--adam-beta2 0.95 \
--bf16 \
--DDP-impl local \
--use-cpu-initialization \
--micro-batch-size 1 \
--save-interval 1 \
--recompute-method block \
--recompute-granularity full \
--recompute-num-layers 1 \
--load $LOAD_CHECKPOINT_PATH \
--save $SAVE_CHECKPOINT_PATH \
--micro-batch-size 1 \
--global-batch-size 1152 \
--lr 0.00009 \
--train-iters 63578 \
--lr-decay-iters 63578 \
--lr-decay-style cosine \
--min-lr 1.8e-5 \
--weight-decay 1e-1 \
--no-load-optim \
--process-checkpoint \
--use-distributed-optimizer
du -sh $SAVE_CHECKPOINT_PATH
# Multi-Stage Prompting for Knowledgeable Dialogue Generation
This directory contains all the scripts of multi-stage prompting for knowledgeable dialogue generation that includes data preparation, and knowledge and response generations. More details are available on [`knowledgeable task directory`](../../tasks/msdp).
#!/bin/bash
# Data preparation for our framework: preprocessing the WoW and WoI datasets
# The datasets can be downloaded through the following links:
# WoW: https://parl.ai/projects/wizard_of_wikipedia/
# WoI: https://parl.ai/projects/sea/
DIR=`pwd`
# Before running the preprocessing, please download
# the wizard of wikipedia and wizard datasets
WOW_DATA_FOLDER=<PATH_OF_WIZARD_OF_WIKIPEDIA_DATA_FOLDER>
WOI_DATA_FOLDER=<PATH_OF_WIZARD_OF_INTERNET_DATA_FOLDER>
# We provide examples for processing the raw data from Wizard of Wikipedia
# Processing the train dataset (train.json)
python ${DIR}/tasks/msdp/preprocessing.py \
--func process_wow_dataset \
--raw_file ${WOW_DATA_FOLDER}/train.json \
--processed_file ${WOW_DATA_FOLDER}/train_processed.txt
# Processing test seen dataset (test_random_split.json)
python ${DIR}/tasks/msdp/preprocessing.py \
--func process_wow_dataset \
--raw_file ${WOW_DATA_FOLDER}/test_random_split.json \
--processed_file ${WOW_DATA_FOLDER}/testseen_processed.txt \
--knwl_ref_file ${WOW_DATA_FOLDER}/output_testseen_knowledge_reference.txt \
--resp_ref_file ${WOW_DATA_FOLDER}/output_testseen_response_reference.txt
# processing test unseen dataset (test_topic_split.json)
python ${DIR}/tasks/msdp/preprocessing.py \
--func process_wow_dataset \
--raw_file ${WOW_DATA_FOLDER}/test_topic_split.json \
--processed_file ${WOW_DATA_FOLDER}/testunseen_processed.txt \
--knwl_ref_file ${WOW_DATA_FOLDER}/output_testunseen_knowledge_reference.txt \
--resp_ref_file ${WOW_DATA_FOLDER}/output_testunseen_response_reference.txt
# We provide the following script to process the raw data from Wizard of Internet
# Processing the test dataset (test.jsonl)
python ${DIR}/tasks/msdp/preprocessing.py \
--func process_woi_dataset \
--raw_file ${WOI_DATA_FOLDER}/test.jsonl \
--processed_file ${WOI_DATA_FOLDER}/test_processed.txt \
--knwl_ref_file ${WOI_DATA_FOLDER}/output_test_knowledge_reference.txt \
--resp_ref_file ${WOI_DATA_FOLDER}/output_test_response_reference.txt
# Get the knowledge generation prompts for the each test dataset in WoW and WoI
MODEL_FILE=<PATH_OF_THE_FINETUNED_DPR_MODEL>
# WoW test seen
python ${DIR}/tasks/msdp/preprocessing.py \
--func get_knwl_gen_prompts \
--test_file ${WOW_DATA_FOLDER}/testseen_processed.txt \
--train_file ${WOW_DATA_FOLDER}/train_processed.txt \
--model_file ${MODEL_FILE} \
--processed_file ${WOW_DATA_FOLDER}/output_testseen_knowledge_prompts.json \
--data_type wow_seen
# WoW test unseen
python ${DIR}/tasks/msdp/preprocessing.py \
--func get_knwl_gen_prompts \
--test_file ${WOW_DATA_FOLDER}/testunseen_processed.txt \
--train_file ${WOW_DATA_FOLDER}/train_processed.txt \
--model_file ${MODEL_FILE} \
--processed_file ${WOW_DATA_FOLDER}/output_testunseen_knowledge_prompts.json \
--data_type wow_unseen
# WoI
python ${DIR}/tasks/msdp/preprocessing.py \
--func get_knwl_gen_prompts \
--test_file ${WOI_DATA_FOLDER}/test_processed.txt \
--train_file ${WOW_DATA_FOLDER}/train_processed.txt \
--model_file ${MODEL_FILE} \
--processed_file ${WOI_DATA_FOLDER}/output_test_knowledge_prompts.json \
--data_type woi
# Get the response generation prompts (can be applied for all the test datasets)
python ${DIR}/tasks/msdp/preprocessing.py \
--func get_resp_gen_prompts \
--train_file ${WOW_DATA_FOLDER}/train_processed.txt \
--processed_file ${WOW_DATA_FOLDER}/output_response_prompts.txt
#!/bin/bash
#########################
# Evaluate the F1 scores.
#########################
WORLD_SIZE=1
DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
--nnodes 1 \
--node_rank 0 \
--master_addr localhost \
--master_port 6000"
MODEL_GEN_PATH=<PATH_OF_THE_KNOWLEDGE_GENERATION> \
(e.g., /testseen_knowledge_generations.txt)
GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_KNOWLEDGE> \
(e.g., /testseen_knowledge_reference.txt)
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
--seq-length 2048 \
--max-position-embeddings 2048 \
--micro-batch-size 4 \
--task MSDP-EVAL-F1 \
--guess-file ${MODEL_GEN_PATH} \
--answer-file ${GROUND_TRUTH_PATH}
############################################
# Evaluate BLEU, METEOR, and ROUGE-L scores.
############################################
# We follow the nlg-eval (https://github.com/Maluuba/nlg-eval) to
# evaluate the BLEU, METEOR, and ROUGE-L scores.
# To evaluate on these metrics, please setup the environments based on
# the nlg-eval github, and run the corresponding evaluation commands.
nlg-eval \
--hypothesis=<PATH_OF_THE_KNOWLEDGE_GENERATION> \
--references=<PATH_OF_THE_GROUND_TRUTH_KNOWLEDGE>
#!/bin/bash
#########################
# Evaluate the F1 scores.
#########################
WORLD_SIZE=1
DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
--nnodes 1 \
--node_rank 0 \
--master_addr localhost \
--master_port 6000"
MODEL_GEN_PATH=<PATH_OF_THE_RESPONSE_GENERATION> \
(e.g., /testseen_response_generations.txt)
GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_RESPONSE> \
(e.g., /testseen_response_reference.txt)
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
--seq-length 2048 \
--max-position-embeddings 2048 \
--micro-batch-size 4 \
--task MSDP-EVAL-F1 \
--guess-file ${MODEL_GEN_PATH} \
--answer-file ${GROUND_TRUTH_PATH}
##########################
# Evaluate the KF1 scores.
##########################
MODEL_GEN_PATH=<PATH_OF_THE_RESPONSE_GENERATION> \
(e.g., /testseen_response_generations.txt)
GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_KNOWLEDGE> \
(e.g., /testseen_knowledge_reference.txt)
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
--seq-length 2048 \
--max-position-embeddings 2048 \
--micro-batch-size 4 \
--task MSDP-EVAL-F1 \
--guess-file ${MODEL_GEN_PATH} \
--answer-file ${GROUND_TRUTH_PATH}
############################################
# Evaluate BLEU, METEOR, and ROUGE-L scores.
############################################
# We follow the nlg-eval (https://github.com/Maluuba/nlg-eval) to
# evaluate the BLEU, METEOR, and ROUGE-L scores.
# To evaluate on these metrics, please setup the environments based on
# the nlg-eval github, and run the corresponding evaluation commands.
nlg-eval \
--hypothesis=<PATH_OF_THE_RESPONSE_GENERATION> \
--references=<PATH_OF_THE_GROUND_TRUTH_RESPONSE>
#!/bin/bash
# Preparing the input file for the response generation (second-stage prompting)
DIR=`pwd`
TEST_FILE=<PATH_OF_PROCESSED_TEST_DATA> \
(e.g., /testseen_processed.txt)
KNOWLEDGE_FILE=<PATH_OF_GENERATED_KNOWLEDGE_DATA> \
(e.g., /testseen_knowledge_generations.txt)
PROCESSED_FILE=<PATH_OF_INPUT_FILE_FOR_RESPONSE_GENERATION> \
(e.g., /testseen_processed_with_generated_knowledge.txt)
python ${DIR}/tasks/msdp/preprocessing.py \
--func prepare_input \
--test_file ${TEST_FILE} \
--knwl_gen_file ${KNOWLEDGE_FILE} \
--processed_file ${PROCESSED_FILE}
#!/bin/bash
# Stage-1: Prompt a pretrained language model to generate the context-relevant knowledge
# The input contains prompts and current dialogue context, the output is the relevant knowledge
# The size of the pretrained language model is 357M
WORLD_SIZE=8
DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
--nnodes 1 \
--node_rank 0 \
--master_addr localhost \
--master_port 6000"
CHECKPOINT_PATH=<PATH_OF_LANGUAGE_MODEL> (e.g., /357m)
VOCAB_PATH=<PATH_OF_VOCAB_FILE> (e.g., /gpt2-vocab.json)
MERGE_PATH=<PATH_OF_MERGE_FILE> (e.g., /gpt2-merges.txt)
INPUT_PATH=<PATH_OF_PROCESSED_TEST_DATA_FILE> \
(e.g., /testseen_processed.txt)
PROMPT_PATH=<PATH_OF_KNOWLEDGE_GENERATION_PROMPTS> \
(e.g., /testseen_knowledge_prompts.json)
OUTPUT_PATH=<PATH_OF_OUTPUT_GENERATION_FILE> \
(e.g., /testseen_knowledge_generations.txt)
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
--seq-length 2048 \
--max-position-embeddings 2048 \
--micro-batch-size 1 \
--vocab-file ${VOCAB_PATH} \
--merge-file ${MERGE_PATH} \
--load ${CHECKPOINT_PATH} \
--fp16 \
--DDP-impl torch \
--tokenizer-type GPT2BPETokenizer \
--sample-input-file ${INPUT_PATH} \
--sample-output-file ${OUTPUT_PATH} \
--prompt-file ${PROMPT_PATH} \
--prompt-type knowledge \
--num-prompt-examples 10 \
--task MSDP-PROMPT
# NOTE: If you use api for the model generation, please use
# the "--api-prompt" flag (setting this value as True).
#!/bin/bash
# Stage-2: Prompt a pretrained language model to generate the corresponding response
# The input contains prompts, current dialogue context, and generated knowledge in Stage-1
# The output is the corresponding response.
# The size of the pretrained language model is 357M
WORLD_SIZE=8
DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
--nnodes 1 \
--node_rank 0 \
--master_addr localhost \
--master_port 6000"
CHECKPOINT_PATH=<PATH_OF_LANGUAGE_MODEL> (e.g., /357m)
VOCAB_PATH=<PATH_OF_VOCAB_FILE> (e.g., /gpt2-vocab.json)
MERGE_PATH=<PATH_OF_MERGE_FILE> (e.g., /gpt2-merges.txt)
INPUT_PATH=<PATH_OF_INPUT_TEST_DATA_FILE> (e.g., /testseen_processed.txt)
PROMPT_PATH=<PATH_OF_RESPONSE_GENERATION_PROMPTS> \
(e.g., /response_prompts.txt)
OUTPUT_PATH=<PATH_OF_OUTPUT_GENERATION_FILE> \
(e.g., /output_testseen_response_generations.txt)
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
--seq-length 2048 \
--max-position-embeddings 2048 \
--micro-batch-size 1 \
--vocab-file ${VOCAB_PATH} \
--merge-file ${MERGE_PATH} \
--load ${CHECKPOINT_PATH} \
--fp16 \
--DDP-impl torch \
--tokenizer-type GPT2BPETokenizer \
--sample-input-file ${INPUT_PATH} \
--sample-output-file ${OUTPUT_PATH} \
--prompt-file ${PROMPT_PATH} \
--prompt-type response \
--num-prompt-examples 20 \
--task MSDP-PROMPT
# NOTE: If you use api for the model generation, please use
# the "--api-prompt" flag (setting this value as True).
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment