init

802ef8b7 · luopl · 802ef8b7 · 802ef8b7 · 802ef8b7
Commit 802ef8b7 authored Oct 11, 2024 by luopl
Showing with 67 additions and 0 deletions

inference_vllm/Qwen2-7B_inference.py inference_vllm/Qwen2-7B_inference.py +23 -0

inference_vllm/Qwen2_72B_inference.py inference_vllm/Qwen2_72B_inference.py +34 -0

model.properties model.properties +10 -0

No files found.
--- a/inference_vllm/Qwen2-7B_inference.py
+++ b/inference_vllm/Qwen2-7B_inference.py
+from vllm import LLM, SamplingParams
+
+# Sample prompts.
+prompts = [
+    "Hello, my name is",
+    "The president of the United States is",
+    "The capital of France is",
+    "The future of AI is",
+]
+# Create a sampling params object.
+sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+
+# Create an LLM.
+llm = LLM(model="/data/model/qwen2.5-7B",trust_remote_code=True, dtype="float16", enforce_eager=True)
+# Generate texts from the prompts. The output is a list of RequestOutput objects
+# that contain the prompt, generated text, and other information.
+outputs = llm.generate(prompts, sampling_params)
+# Print the outputs.
+for output in outputs:
+    prompt = output.prompt
+    generated_text = output.outputs[0].text
+    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+
--- a/inference_vllm/Qwen2_72B_inference.py
+++ b/inference_vllm/Qwen2_72B_inference.py
+import torch
+from vllm import LLM, SamplingParams
+
+def main():
+    # Sample prompts.
+    prompts = [
+        "The capital of France is",
+    ]
+
+    # Create a sampling params object.
+    sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+
+    # Create an LLM object with model path and configuration.
+    llm = LLM(model="/data/luopl/Qwen/Qwen2.5-72B",
+              tensor_parallel_size=4,
+              trust_remote_code=True,
+              gpu_memory_utilization=0.95,
+              dtype="float16",
+              max_model_len=44000,
+              enforce_eager=True)
+
+    # Generate texts from the prompts.
+    outputs = llm.generate(prompts, sampling_params)
+
+    # Print the outputs.
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+
+if __name__ == '__main__':
+    main()
+
+
--- a/model.properties
+++ b/model.properties
+# 模型唯一标识
+modelCode=1026
+# 模型名称
+modelName=qwen2.5_pytorch
+# 模型描述
+modelDescription=相较于 Qwen2，Qwen2.5 获得了显著更多的知识（MMLU：85+），并在编程能力（HumanEval 85+）和数学能力（MATH 80+）方面有了大幅提升。
+# 应用场景
+appScenario=推理,训练,对话问答,科研,教育,政府,金融
+# 框架类型
+frameType=Pytorch