Commit d51449c7 authored by zhouxiang's avatar zhouxiang
Browse files

更新支持chatglm3-32k

parent 81e6b0e9
......@@ -41,7 +41,7 @@ ChatGLM3-6B基于GLM架构开发。GLM是一种基于Transformer的语言模型
在光源可拉取推理的docker镜像,拉取方式如下:
```
docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:lmdeploy-dtk23.10-torch1.13-py38
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-centos7.6-dtk23.10.1-py38
```
### 容器启动
......@@ -51,7 +51,15 @@ docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:lmdeploy-dtk23.10-tor
```
# <container_name> 自定义容器名
# <project_path> 当前工程所在路径
docker run -it --name=<container_name> -v <project_path>:/work -v /opt/hyhal:/opt/hyhal --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --ipc=host --network host --shm-size=16G --group-add video image.sourcefind.cn:5000/dcu/admin/base/custom:lmdeploy-dtk23.10-torch1.13-py38 /bin/bash
docker run -it --name=<container_name> -v <project_path>:/work -w /work --privileged -v /opt/hyhal:/opt/hyhal --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --ipc=host --network host --shm-size=16G --group-add video image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-centos7.6-dtk23.10.1-py38 /bin/bash
```
### 加载环境
进入容器后执行如下命令,加载运行环境变量
```
source /opt/dtk/cuda/env.sh
```
### 安装方法
......
......@@ -206,7 +206,7 @@ def predict(id: str, query: str, history: List[List[str]], model_id: str, max_le
yield '[DONE]'
def args_parser():
parser = argparse.ArgumentParser(description = 'baichuan2_chat_demo')
parser = argparse.ArgumentParser(description = 'chatglm3_chat_demo')
parser.add_argument('-p', '--path', type = str, default = "/model", help = '模型文件的路径')
parser.add_argument('-g', '--gpus', type = str, default = "0", help = '指定运行的gpu卡,例如“0,1”')
args = parser.parse_args()
......
......@@ -54,10 +54,11 @@ def create(model,
if (modelInfo["model_type"] == "chatglm" and hasattr(tokenizer, "build_chat_input")):
# chatglm3
modelInfo["pre_prompt"] = "";
modelInfo["user_role"] = ("<FLM_FIX_TOKEN_" + str(tokenizer.get_command("<|user|>")) + ">\n");
modelInfo["user_role"] = ("<FLM_FIX_TOKEN_" + str(tokenizer.get_command("<|user|>")) + "> \n");
modelInfo["bot_role"] = ("<FLM_FIX_TOKEN_" + str(tokenizer.get_command("<|assistant|>")) + ">");
modelInfo["history_sep"] = "";
modelInfo["tokenizer_use_score"] = "1" # 分词带分数
weight_type_dict = {};
module_dict = {};
......
......@@ -118,7 +118,7 @@ def tofile(exportPath,
print("chatglm3")
# chatglm3
modelInfo["pre_prompt"] = "";
modelInfo["user_role"] = ("<FLM_FIX_TOKEN_" + str(tokenizer.get_command("<|user|>")) + ">\n");
modelInfo["user_role"] = ("<FLM_FIX_TOKEN_" + str(tokenizer.get_command("<|user|>")) + "> \n");
modelInfo["bot_role"] = ("<FLM_FIX_TOKEN_" + str(tokenizer.get_command("<|assistant|>")) + ">");
modelInfo["history_sep"] = "";
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment