add chat model test method in readme

0bcdcc51 · huangwb · a1dc20dd · 0bcdcc51 · 0bcdcc51 · 0bcdcc51
Commit 0bcdcc51 authored Jun 04, 2024 by huangwb
Showing with 63 additions and 6 deletions

README.md README.md +26 -5

configs/tgi/tokenizer_config_llama2_7b_chat.json configs/tgi/tokenizer_config_llama2_7b_chat.json +36 -0

opencompass/models/tgi_chat_api.py opencompass/models/tgi_chat_api.py +1 -1

No files found.
--- a/README.md
+++ b/README.md
@@ -67,7 +67,7 @@ python tools/list_configs.py llama mmlu
 根据需要的框架进行安装，然后运行：
-1、使用vllm推理验证
+#### 1、使用vllm推理验证
 环境及使用参考：[https://developer.hpccube.com/codes/OpenDAS/vllm](https://developer.hpccube.com/codes/OpenDAS/vllm)
 ```shell
@@ -75,7 +75,7 @@ python run.py configs/vllm/eval_llama2_vllm.py
 ```
 其它模型使用参考`configs/vllm/eval_xxx_vllm.py`
-2、使用lmdeploy推理验证
+#### 2、使用lmdeploy推理验证
 环境及使用参考：[https://developer.hpccube.com/codes/OpenDAS/lmdeploy](https://developer.hpccube.com/codes/OpenDAS/lmdeploy)
 ```shell
@@ -101,14 +101,35 @@ python run.py configs/lmdeploy/eval_llama2_lmdeploy.py
 ```
 其它模型使用参考`configs/lmdeploy/eval_xxx_lmdeploy.py`
-3、使用tgi推理验证
+#### 3、使用tgi推理验证
-环境及使用参考：[https://developer.hpccube.com/codes/OpenDAS/text-generation-inference](https://developer.hpccube.com/codes/OpenDAS/text-generation-inference)
+run所有的opencompass测试都需要先开启TGI的服务，TGI服务的环境及使用参考：[https://developer.hpccube.com/codes/OpenDAS/text-generation-inference](https://developer.hpccube.com/codes/OpenDAS/text-generation-inference)
+**评测base模型**
+启动服务example：
+```shell
+HIP_VISIBLE_DEVICES=3 text-generation-launcher --dtype=float16 --model-id /data/models/Llama-2-7b-chat-hf --port 3001
+```
+运行评测example：
+```shell
+python run.py configs/tgi/eval_llama2_tgi.py --debug
+```
+**评测chat模型**
+评测chat模型需要在模型路径里的`tokenizer_config.json`配置文件中提供`chat_template`，不同模型的`chat_template`可以参考[https://github.com/chujiezheng/chat_templates/tree/main/chat_templates](https://github.com/chujiezheng/chat_templates/tree/main/chat_templates)。
+具体操作的时候可以从模型路径下copy一份`tokenizer_config.json`到其他目录下，比如copy后的文件为`tokenizer_config_llama2_7b_chat.json`,在该文件里添加`chat_template`，然后在起服务的时候指定`--tokenizer-config-path`参数为修改后的文件。以llama为列，修改后的config例子见[tokenizer_config_llama2_7b_chat.json](./configs/tgi/tokenizer_config_llama2_7b_chat.json)
+启动服务example：
+```shell
+HIP_VISIBLE_DEVICES=3 text-generation-launcher --dtype=float16 --model-id /data/models/Llama-2-7b-chat-hf --port 3001 --tokenizer-config-path /path/to/tokenizer_config_llama2_7b_chat.json
+```
+注意：和base模型比多了`--tokenizer-config-path`参数。
+运行评测example：
 ```shell
-python run.py configs/tgi/eval_llama2_tgi.py
+python run.py configs/tgi/eval_llama2_7b_chat_tgi.py.py --debug
 ```
 其它模型使用参考`configs/tgi/eval_xxx_tgi.py`
+---
 参数说明：
 （1）数据集配置参数
 `work_dir`为保存路径，`from .datasets.ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets`为使用的数据集，可以在`configs/datasets`路径下查找并配置，vllm目前不支持ppl-based评测。

--- a/configs/tgi/tokenizer_config_llama2_7b_chat.json
+++ b/configs/tgi/tokenizer_config_llama2_7b_chat.json
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "chat_template":"{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = '<<SYS>>\n' + messages[0]['content'].strip() + '\n<</SYS>>\n\n' %}{% else %}{% set loop_messages = messages %}{% set system_message = '' %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{% set content = system_message + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
--- a/opencompass/models/tgi_chat_api.py
+++ b/opencompass/models/tgi_chat_api.py
@@ -18,7 +18,7 @@ from transformers.models.qwen2 import Qwen2Tokenizer
 from transformers.models.llama import LlamaTokenizer
 PromptType = Union[PromptList, str]
-OPENAI_API_BASE = 'http://localhost:3000/v1/chat/completions'
+OPENAI_API_BASE = 'http://localhost:3001/v1/chat/completions'
 class TGICHATAPI(BaseAPIModel):