"cacheflow/model_executor/model_loader.py" did not exist on "bb59a3e7302ad6892e097eee4040e3f516e9f4ea"
Commit 059ba02f authored by xiabo's avatar xiabo
Browse files

Update README.md

parent 243ff4a4
......@@ -64,10 +64,8 @@ cd .. && python3 setup.py install
# <tokenizer_path> tokenizer模型的路径(默认None,会去model_path里面找qwen.tiktoken)
# <model_format> 保存输出的目标路径(默认./workspace)
# <tp> 用于张量并行的GPU数量应该是2^n
# <quant_path> 量化模型的路径,可以为None(用于int4量化,使用默认None)
# <group_size> AWQ中用于将fp16权重量化为4位的参数(用于int4量化,使用默认'0')
mdeploy convert --model_name qwen-7b --model_path /path/to/model --model_format qwen --tokenizer_path None --dst_path ./workspace_qwe7b --tp 1 --quant_path None --group_size 0
mdeploy convert --model_name qwen-7b --model_path /path/to/model --model_format qwen --tokenizer_path None --dst_path ./workspace_qwe7b --tp 1
# bash界面运行
lmdeploy chat turbomind --model_path ./workspace_qwe7b --tp 1 # 输入问题后执行2次回车进行推理
......@@ -89,7 +87,7 @@ lmdeploy serve gradio --model_path_or_server ./workspace_qwe7b --server_name {ip
### 运行 Qwen-14B-chat
```
# 模型转换
mdeploy convert --model_name qwen-14b --model_path /path/to/model --model_format qwen --tokenizer_path None --dst_path ./workspace_qwe14b --tp 2 --quant_path None --group_size 0
mdeploy convert --model_name qwen-14b --model_path /path/to/model --model_format qwen --tokenizer_path None --dst_path ./workspace_qwe14b --tp 2
# bash界面运行
lmdeploy chat turbomind --model_path ./workspace_qwe14b --tp 2
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment