# We enable enforce_eager=True here to reduce VRAM usage for lora-test CI,
# Otherwise, the lora-test will fail due to CUDA OOM.
llm=vllm.LLM(MODEL_PATH,
max_model_len=1024,
enable_lora=True,
max_loras=2,
enforce_eager=True)
expected_lora_output=[
"SELECT catalog_publisher, COUNT(*) as num_catalogs FROM catalogs GROUP BY catalog_publisher ORDER BY num_catalogs DESC LIMIT 1;",# noqa: E501
"SELECT trip.id FROM trip JOIN station ON trip.start_station_id = station.id WHERE station.dock_count = (SELECT MAX(dock_count) FROM station);",# noqa: E501
"SELECT COUNT(*) FROM marine_species WHERE location = 'Southern Ocean';",# noqa: E501