"""This LoRA model has all supported Mixtral target modules"""
iftorch.cuda.device_count()<tp_size:
pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}")
prompts=[
"Question: What happens if you touch the eyes of a blind man?\nChoices:\nB: The man's sight will be restored.\nA: Nothing happens if you touch the eyes of a blind man.\nC: The man will get his eyesight back.\nD: The man will no longer be blind.\nAnswer:",# noqa: E501
"Question: Which will increase the energy of motion of water molecules?\nChoices:\nA: add heat\nB: add ice\nC: remove some of the water\nD: place the water in the freezer\nAnswer:",# noqa: E501
"Since Craig threw aluminum cans in the trash and Benjamin recycled, _ was environmentally irresponsible.\nChoices:\n1: Craig\n2: Benjamin\nAnswer:",# noqa: E501
]
llm=vllm.LLM(
MODEL_PATH,
enable_lora=True,
max_num_seqs=16,
max_loras=4,
distributed_executor_backend="ray",
tensor_parallel_size=tp_size,
fully_sharded_loras=fully_shard,
max_lora_rank=32,
)
expected_lora_output=[
"A: Nothing happens if you touch the eyes of a blind man.",