Modify chat.py in 70B

86913514 · Rayyyyy · 0aea7dd1 · 86913514 · 86913514 · 86913514
Commit 86913514 authored May 14, 2024 by Rayyyyy
Showing with 18 additions and 129 deletions

chat.sh chat.sh +0 -3

finetune.sh finetune.sh +1 -1

llama3_chat.py llama3_chat.py +12 -5

run_train.sh run_train.sh +0 -23

run_train_single.sh run_train_single.sh +0 -91

test.sh test.sh +5 -6

No files found.
--- a/chat.sh
+++ b/chat.sh
@@ -5,9 +5,6 @@ export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 # 可自行修改为指定显卡号
 export HSA_FORCE_FINE_GRAIN_PCIE=1
 export USE_MIOPEN_BATCHNORM=1

-export MASTER_ADDR=localhost
-export MASTER_PORT=12355
-export RANK=0
 echo "Starting ..."

 # 8B

--- a/finetune.sh
+++ b/finetune.sh
 #!/bin/bash
 echo "Export params ..."
-export HIP_VISIBLE_DEVICES=1,2
+export HIP_VISIBLE_DEVICES=1,2 # 可自行修改为指定显卡号
 export HSA_FORCE_FINE_GRAIN_PCIE=1
 export USE_MIOPEN_BATCHNORM=1


--- a/llama3_chat.py
+++ b/llama3_chat.py
+import os
 import sys
 import fire
+import warnings

 from typing import List, Optional
 from llama import Dialog, Llama

+warnings.filterwarnings('ignore', category=UserWarning)

 def main(
    ckpt_dir: str,
@@ -24,11 +27,15 @@ def main(
    try:
        # Continue util the user decides to stop
        while True:
-            user_input = input("You: ")
-            # Allow the user to quit the dialogue
-            if user_input.lower() in ['stop', 'exit']:
-                break
-            dialogs.append({"role": "user", "content": user_input})
+            local_rank = int(os.environ.get("LOCAL_RANK", 0))
+            if local_rank > 0:
+                dialogs.append({"role": "user", "content": "None"})
+            else:
+                user_input = input("You: ")
+                # Allow the user to quit the dialogue
+                if user_input.lower() in ['stop', 'exit']:
+                    break
+                dialogs.append({"role": "user", "content": user_input})
            # Generate response based on the current dialog context
            results  = generator.chat_completion(
                [dialogs],

--- a/run_train.sh
+++ b/run_train.sh
-ulimit -u 200000
-
-export OMP_NUM_THREADS=1
-export NCCL_DEBUG=INFO
-export MIOPEN_FIND_MODE=3
-export HSA_FORCE_FINE_GRAIN_PCIE=1
-export MIOPEN_COMPILE_PARALLEL_LEVEL=1
-export NCCL_PLUGIN_P2P=ucx
-export NCCL_SOCKET_IFNAME=ib0
-export NCCL_P2P_LEVEL=5
-export NCCL_NET_PLUGIN=none
-
-echo "START TIME: $(date)"
-hostfile=./hostfile
-
-np=$(cat $hostfile|sort|uniq |wc -l)
-np=$(($np*8))
-
-nodename=$(cat $hostfile |sed -n "1p")
-dist_url=`echo $nodename | awk '{print $1}'`
-which mpirun
-mpirun -np $np --allow-run-as-root --hostfile hostfile --bind-to none --mca btl_tcp_if_include $dist_url run_train_single.sh
-echo "END TIME: $(date)"
--- a/run_train_single.sh
+++ b/run_train_single.sh
-#!/bin/bash
-
-export HSA_FORCE_FINE_GRAIN_PCIE=1
-export MIOPEN_FIND_MODE=3
-export MIOPEN_COMPILE_PARALLEL_LEVEL=1
-export NCCL_PLUGIN_P2P=ucx
-export NCCL_SOCKET_IFNAME=ib0
-export NCCL_P2P_LEVEL=5
-export NCCL_IB_HCA=mlx5_0
-export NCCL_DEBUG=INFO
-export NCCL_NET_PLUGIN=none
-
-lrank=$OMPI_COMM_WORLD_LOCAL_RANK
-echo "LRANK===============================$lrank"
-RANK=$OMPI_COMM_WORLD_RANK
-WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
-export HIP_VISIBLE_DEVICES=0,1,2,3
-
-LR=1e-5
-APP="python3 ../main.py \
-    --deepspeed ../deepspeed.json \
-    --do_train \
-    --train_file AdvertiseGen/train.json \
-    --prompt_column content  \
-    --response_column summary \
-    --model_name_or_path THUDM/chatglm-6b \
-    --output_dir ./output_ft/pretrain \
-    --overwrite_output_dir \
-    --max_source_length 64 \
-    --max_target_length 64 \
-    --per_device_train_batch_size 1 \
-    --per_device_eval_batch_size 1 \
-    --gradient_accumulation_steps 1 \
-    --predict_with_generate \
-    --max_steps 2000 \
-    --logging_steps 5 \
-    --save_steps 1000 \
-    --learning_rate $LR \
-    --fp16 \
-    --local_rank $lrank "
-
-case ${lrank} in
-[0])
-  export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-  export UCX_NET_DEVICES=mlx5_0:1
-  export UCX_IB_PCI_BW=mlx5_0:50Gbs
-  numactl --cpunodebind=0 --membind=0 ${APP}
-  ;;
-[1])
-  export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-  export UCX_NET_DEVICES=mlx5_1:1
-  export UCX_IB_PCI_BW=mlx5_1:50Gbs
-  numactl --cpunodebind=0 --membind=0 ${APP}
-  ;;
-[2])
-  export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-  export UCX_NET_DEVICES=mlx5_2:1
-  export UCX_IB_PCI_BW=mlx5_2:50Gbs
-  numactl --cpunodebind=0 --membind=0 ${APP}
-  ;;
-[3])
-  export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-  export UCX_NET_DEVICES=mlx5_3:1
-  export UCX_IB_PCI_BW=mlx5_3:50Gbs
-  numactl --cpunodebind=0 --membind=0 ${APP}
-  ;;
-[4])
-  export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-  export UCX_NET_DEVICES=mlx5_4:1
-  export UCX_IB_PCI_BW=mlx5_4:50Gbs
-  numactl --cpunodebind=3 --membind=3 ${APP}
-  ;;
-[5])
-  export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-  export UCX_NET_DEVICES=mlx5_5:1
-  export UCX_IB_PCI_BW=mlx5_5:50Gbs
-  numactl --cpunodebind=3 --membind=3 ${APP}
-  ;;
-[6])
-  export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-  export UCX_NET_DEVICES=mlx5_6:1
-  export UCX_IB_PCI_BW=mlx5_6:50Gbs
-  numactl --cpunodebind=3 --membind=3 ${APP}
-  ;;
-[7])
-  export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-  export UCX_NET_DEVICES=mlx5_7:1
-  export UCX_IB_PCI_BW=mlx5_7:50Gbs
-  numactl --cpunodebind=3 --membind=3 ${APP}
-  ;;
-esac
--- a/test.sh
+++ b/test.sh
 #!/bin/bash
 echo "Export params ..."
-
-export HIP_VISIBLE_DEVICES=0 # 自行修改为训练的卡号和数量
+export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 # 可自行修改为指定显卡号
 export HSA_FORCE_FINE_GRAIN_PCIE=1
 export USE_MIOPEN_BATCHNORM=1

@@ -13,8 +12,8 @@ torchrun --nproc_per_node 1 example_text_completion.py \
    --tokenizer_path Meta-Llama-3-8B/original/tokenizer.model \
    --max_seq_len 128 --max_batch_size 4

-# Meta-Llama-3-8B-Instruct 模型
-# torchrun --nproc_per_node 1 example_chat_completion.py \
-#     --ckpt_dir ./Meta-Llama-3-8B-Instruct/original/ \
-#     --tokenizer_path ./Meta-Llama-3-8B-Instruct/original/tokenizer.model \
+# Meta-Llama-3-70B-Instruct 模型
+# torchrun --nproc_per_node 8 example_chat_completion.py \
+#     --ckpt_dir /data/Meta-llama3-models/Meta-Llama-3-70B-Instruct/original/ \
+#     --tokenizer_path /data/Meta-llama3-models/Meta-Llama-3-70B-Instruct/original/tokenizer.model \
 #     --max_seq_len 512 --max_batch_size 6