add chat

5c13d125 · Rayyyyy · bf0dbb2e · 5c13d125 · 5c13d125 · 5c13d125
Commit 5c13d125 authored May 06, 2024 by Rayyyyy
Hide whitespace changes
Inline Side-by-side

Showing with 67 additions and 2 deletions

README.md README.md +9 -2

chat.sh chat.sh +12 -0

llama3_chat.py llama3_chat.py +46 -0

No files found.
--- a/README.md
+++ b/README.md
@@ -118,8 +118,8 @@ NPROC_PER_NODE=${DCU_NUM} xtuner train ./llama3_8b_instruct_qlora_alpaca_e3_M.py

 - Meta-Llama-3-8B 模型示例，Meta-Llama-3-70B模型仅需替换--ckpt_dir、--tokenizer_path对应模型地址即可。
 ```bash
-torchrun --nproc_per_node 1 example_text_completion.py \
-    --ckpt_dir Meta-Llama-3-8B/original/ \
+torchrun --nproc_per_node 8 example_text_completion.py \
+    --ckpt_dir Meta-Llama-3-70B/original/ \
    --tokenizer_path Meta-Llama-3-8B/original/tokenizer.model \
    --max_seq_len 128 --max_batch_size 4
 ```
@@ -140,6 +140,13 @@ torchrun --nproc_per_node 1 example_chat_completion.py \
    --tokenizer_path Meta-Llama-3-8B-Instruct/original/tokenizer.model \
    --max_seq_len 512 --max_batch_size 6
 ```
+## 多轮对话
+1. 确认环境安装及模型下载完毕；
+2. 修改[chat.sh](./chat.sh)文件中的 `--ckpt_dir`、`--tokenizer_path` 参数为本地模型地址，`--max_seq_len` 根据自身需求进行修改，调整该值可以增加多轮对话模型的记忆长度，不过需要注意的是这可能会增加模型运算的时间和内存需求；
+3. 执行：
+```bash
+bash chat.sh
+```

 ## Evaluation
 1. 安装 `llama-recipes`、`lm-eval` 库

--- a/chat.sh
+++ b/chat.sh
+#!/bin/bash
+echo "Export params ..."
+export HIP_VISIBLE_DEVICES=0 # 可自行修改为指定显卡号
+export HSA_FORCE_FINE_GRAIN_PCIE=1
+export USE_MIOPEN_BATCHNORM=1
+
+export MASTER_ADDR=localhost
+export MASTER_PORT=12355
+export RANK=0
+export WORLD_SIZE=1
+
+torchrun --nproc_per_node 1 llama3_chat.py --ckpt_dir Meta-Llama-3-8B-Instruct/original/ --tokenizer_path Meta-Llama-3-8B-Instruct/original/tokenizer.model --max_seq_len 2048 --max_batch_size 6
--- a/llama3_chat.py
+++ b/llama3_chat.py
+import sys
+import fire
+
+from typing import List, Optional
+from llama import Dialog, Llama
+
+
+def main(
+    ckpt_dir: str,
+    tokenizer_path: str,
+    temperature: float = 0.6,
+    top_p: float = 0.9,
+    max_seq_len: int = 512,
+    max_batch_size: int = 4,
+    max_gen_len: Optional[int] = None,
+):
+    generator = Llama.build(
+        ckpt_dir=ckpt_dir,
+        tokenizer_path=tokenizer_path,
+        max_seq_len=max_seq_len,
+        max_batch_size=max_batch_size,
+    )
+    dialogs: List[Dialog] = [[]] # Start with an empty dialog
+    try:
+        # Continue util the user decides to stop
+        while True:
+            user_input = input("You: ")
+            # Allow the user to quit the dialogue
+            if user_input.lower() in ['stop', 'exit']:
+                break
+            dialogs[0].append({"role": "user", "content": user_input})
+            # Generate response based on the current dialog context
+            results  = generator.chat_completion(
+                [dialogs],
+                max_gen_len=max_gen_len,
+                temperature=temperature,
+                top_p=top_p,)[0]
+            response = results['generation']['content']
+            print(f"Assistant: {response}\n")
+            # Append the generated response to the dialog
+            dialogs[0].append({"role": "assistant", "content": response})
+    except KeyboardInterrupt:
+        print("Exiting dialogue.")
+
+if __name__ == "__main__":
+    fire.Fire(main)