update README

88c3b719 · Rayyyyy · d3471cd8 · d3471cd8 · 88c3b719 · 88c3b719
Commit 88c3b719 authored May 21, 2024 by Rayyyyy
6 changed files
--- a/Inference_with_llm.py
+++ b/Inference_with_llm.py
-from transformers import AutoTokenizer
-from vllm import LLM, SamplingParams
-
-max_model_len, tp_size = 8192, 1
-
-model_name_or_path = "deepseek-ai/DeepSeek-V2-Lite-Chat"
-
-tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
-llm = LLM(model=model_name_or_path, tensor_parallel_size=tp_size, max_model_len=max_model_len, trust_remote_code=True, enforce_eager=True)
-sampling_params = SamplingParams(temperature=0.3, max_tokens=256, stop_token_ids=[tokenizer.eos_token_id])
-
-messages_list = [
-    [{"role": "user", "content": "Who are you?"}],
-    [{"role": "user", "content": "Translate the following content into Chinese directly: DeepSeek-V2 adopts innovative architectures to guarantee economical training and efficient inference."}],
-    [{"role": "user", "content": "Write a piece of quicksort code in C++."}],
-]
-
-prompt_token_ids = [tokenizer.apply_chat_template(messages, add_generation_prompt=True) for messages in messages_list]
-
-outputs = llm.generate(prompt_token_ids=prompt_token_ids, sampling_params=sampling_params)
-
-generated_text = [output.outputs[0].text for output in outputs]
-print(generated_text)
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 [deepseek-v2](https://arxiv.org/abs/2405.04434)

 ## 模型结构
-
+DeepSeek-V2对模型框架进行了全方位的创新，提出了媲美MHA的MLA（Multi-head Latent Attention）架构，大幅减少计算量和推理显存；自研Sparse结构DeepSeekMoE进一步将计算量降低到极致，两者结合最终实现模型性能跨级别的提升。

 <div align=center>
    <img src="./doc/model.png"/>
@@ -58,10 +58,28 @@ export HF_ENDPOINT=https://hf-mirror.com
 暂无

 ## 训练
-
+暂无

 ## 推理
+基于Huggingface's Transformers进行推理，根据本地模型地址设置`model_name_or_path`参数。
+
+如未下载预训练模型，代码会根据选择自动进行下载，当前可用模型为："deepseek-ai/DeepSeek-V2-Lite"、"deepseek-ai/DeepSeek-V2-Lite-Chat"。

+### 文本扩写
+```bash
+export HSA_FORCE_FINE_GRAIN_PCIE=1
+export USE_MIOPEN_BATCHNORM=1
+
+python text_completion.py
+```
+
+### 对话
+```bash
+export HSA_FORCE_FINE_GRAIN_PCIE=1
+export USE_MIOPEN_BATCHNORM=1
+
+python chat_completion.py
+```

 ### 精度
 暂无
@@ -71,11 +89,9 @@ export HF_ENDPOINT=https://hf-mirror.com
 对话问答

 ### 热点应用行业
-制造,广媒,家居,教育
+金融,广媒,教育

 ## 预训练权重
-
-
 模型目录结构如下：
 ```bash
 ├── model_save_path
@@ -117,5 +133,4 @@ export HF_ENDPOINT=https://hf-mirror.com

 ## 参考资料
 - https://github.com/deepseek-ai/DeepSeek-V2
- https://huggingface.co/deepseek-ai/DeepSeek-V2
- https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite
+- https://huggingface.co/deepseek-ai
--- a/chat_completion.py
+++ b/chat_completion.py
-
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

-model_name_or_path = "/home/DeepSeek-V2/DeepSeek-V2-Lite-Chat"
-# model_name_or_path = "deepseek-ai/DeepSeek-V2-Lite-Chat"
+
+model_name_or_path = "deepseek-ai/DeepSeek-V2-Lite-Chat"

 tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype=torch.bfloat16).cuda()
@@ -13,8 +12,9 @@ model.generation_config.pad_token_id = model.generation_config.eos_token_id
 messages = [
    {"role": "user", "content": "Write a piece of quicksort code in C++"}
 ]
+
 input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
 outputs = model.generate(input_tensor.to(model.device), max_new_tokens=100)

 result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True)
-print("result", result)
\ No newline at end of file
+print("result", result)
--- a/model.properties
+++ b/model.properties
+# 模型唯一标识
+modelCode=639
+# 模型名称
+modelName=deepseek-v2_pytorch
+# 模型描述
+modelDescription=DeepSeek-V2,第二代MoE模型,参数更多、能力更强、成本更低
+# 应用场景
+appScenario=推理,对话问答,金融,广媒,教育
+# 框架类型
+frameType=pytorch
--- a/test.sh
+++ b/test.sh
-echo "Export params ..."
-
-export HSA_FORCE_FINE_GRAIN_PCIE=1
-export USE_MIOPEN_BATCHNORM=1
-
-
-python text_completion.py
\ No newline at end of file
--- a/text_completion.py
+++ b/text_completion.py
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

-# model_name_or_path = "deepseek-ai/DeepSeek-V2-Lite"
-model_name_or_path = "/home/DeepSeek-V2/DeepSeek-V2-Lite-Chat"
+model_name_or_path = "deepseek-ai/DeepSeek-V2-Lite"
+
 tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)

 # `max_memory` should be set based on your devices
@@ -25,4 +25,4 @@ inputs = tokenizer(text, return_tensors="pt")
 outputs = model.generate(**inputs.to(model.device), max_new_tokens=100)

 result = tokenizer.decode(outputs[0], skip_special_tokens=True)
-print("result", result)
\ No newline at end of file
+print("result", result)