Add vllm

60223917 · chenych · 8a7ff08b · 60223917
Commit 60223917 authored Jun 16, 2025 by chenych
Hide whitespace changes
Inline Side-by-side

Showing with 40 additions and 0 deletions

infer_vllm.py infer_vllm.py +40 -0

No files found.
--- a/infer_vllm.py
+++ b/infer_vllm.py
+import argparse
+
+from vllm import LLM
+from vllm.sampling_params import SamplingParams
+
+# This script demonstrates how to use vLLM to run inference with a Mistral model.
+parse = argparse.ArgumentParser()
+parse.add_argument("--user_prompt", type=str, default="Give me 5 non-formal ways to say 'See you later' in French.")
+parse.add_argument("--model_name_or_path", type=str, default="mistralai/Mistral-Small-3.1-24B-Instruct-2503")
+
+args = parse.parse_args()
+
+# Define the system prompt for the conversational agent
+SYSTEM_PROMPT = "You are a conversational agent that always answers straight to the point, always end your accurate response with an ASCII drawing of a cat."
+
+
+
+if __name__ == "__main__":
+    # Prepare the messages for the LLM
+    messages = [
+        {
+            "role": "system",
+            "content": SYSTEM_PROMPT
+        },
+        {
+            "role": "user",
+            "content": args.user_prompt
+        },
+    ]
+
+    llm = LLM(model=args.model_name_or_path,
+            trust_remote_code=True,
+            enforce_eager=True,
+            tensor_parallel_size=2,
+            max_model_len=8192)
+
+    sampling_params = SamplingParams(max_tokens=512, temperature=0.15)
+    outputs = llm.chat(messages, sampling_params=sampling_params)
+
+    print(outputs[0].outputs[0].text)
\ No newline at end of file