paligemma_example.py 486 Bytes
Newer Older
Roger Wang's avatar
Roger Wang committed
1
from vllm import LLM
2
from vllm.assets.image import ImageAsset
Roger Wang's avatar
Roger Wang committed
3
4
5
6
7
8
9


def run_paligemma():
    llm = LLM(model="google/paligemma-3b-mix-224")

    prompt = "caption es"

10
    image = ImageAsset("stop_sign").pil_image
Roger Wang's avatar
Roger Wang committed
11
12
13
14
15
16
17
18
19
20
21
22
23
24

    outputs = llm.generate({
        "prompt": prompt,
        "multi_modal_data": {
            "image": image
        },
    })

    for o in outputs:
        generated_text = o.outputs[0].text
        print(generated_text)


if __name__ == "__main__":
25
    run_paligemma()