Commit 60223917 authored by chenych's avatar chenych
Browse files

Add vllm

parent 8a7ff08b
import argparse
from vllm import LLM
from vllm.sampling_params import SamplingParams
# This script demonstrates how to use vLLM to run inference with a Mistral model.
parse = argparse.ArgumentParser()
parse.add_argument("--user_prompt", type=str, default="Give me 5 non-formal ways to say 'See you later' in French.")
parse.add_argument("--model_name_or_path", type=str, default="mistralai/Mistral-Small-3.1-24B-Instruct-2503")
args = parse.parse_args()
# Define the system prompt for the conversational agent
SYSTEM_PROMPT = "You are a conversational agent that always answers straight to the point, always end your accurate response with an ASCII drawing of a cat."
if __name__ == "__main__":
# Prepare the messages for the LLM
messages = [
{
"role": "system",
"content": SYSTEM_PROMPT
},
{
"role": "user",
"content": args.user_prompt
},
]
llm = LLM(model=args.model_name_or_path,
trust_remote_code=True,
enforce_eager=True,
tensor_parallel_size=2,
max_model_len=8192)
sampling_params = SamplingParams(max_tokens=512, temperature=0.15)
outputs = llm.chat(messages, sampling_params=sampling_params)
print(outputs[0].outputs[0].text)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment