lora_vllm_play.py 705 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from vllm import LLM, SamplingParams
from vllm.lora.request import LoRARequest

MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
ADAPTER = "/home/ying/test_lora"
prompt = """
### Instruction:
Write a poem about the transformers Python library. 
Mention the word "large language models" in that poem.
### Response:
The Transformers are large language models,
They're used to make predictions on text.
"""


llm = LLM(model=MODEL, enable_lora=True)

sampling_params = SamplingParams(
    temperature=0,
    max_tokens=32,
)

prompts = [prompt]

outputs = llm.generate(
    prompts, sampling_params, lora_request=LoRARequest("test_lora", 1, ADAPTER)
)

print(outputs[0].prompt)
print(outputs[0].outputs[0].text)