"<|im_start|>User\n<image>\nWhat is the season?<|im_end|>\n<|im_start|>Assistant\n",# noqa: E501
})
HF_MULTIIMAGE_IMAGE_PROMPT="<|im_start|>User\nImage-1: <image>\nImage-2: <image>\nDescribe the two images in detail.<|im_end|>\n<|im_start|>Assistant\n"# noqa: E501
models=[
"OpenGVLab/InternVL2-1B",
...
...
@@ -64,13 +66,13 @@ def generate(
defrun_test(
hf_runner:Type[HfRunner],
vllm_runner:Type[VllmRunner],
image_assets:_ImageAssets,
inputs:List[Tuple[List[str],PromptImageInput]],
model:str,
*,
size_factors:List[float],
dtype:str,
max_tokens:int,
num_logprobs:int,
mm_limit:int,
tensor_parallel_size:int,
distributed_executor_backend:Optional[str]=None,
):
...
...
@@ -83,12 +85,6 @@ def run_test(
Note, the text input is also adjusted to abide by vllm contract.
The text output is sanitized to be able to compare with hf.