from vllm import LLM, SamplingParams def inference(model_path): messages = [ {"role": "user", "content": "孩子咳嗽老不好怎么办?"} ] sampling_params = SamplingParams(temperature=0.1, top_p=0.95, max_tokens=512) llm = LLM(model=model_path) outputs = llm.chat(messages, sampling_params) for output in outputs: prompt = output.prompt generated_text = output.outputs[0].text print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") if __name__ == "__main__": from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument("--model_path", type=str) args = parser.parse_args() inference(args.model_path)