import torch from PIL import Image from ovis.model.modeling_ovis import Ovis # If you need video support, make sure moviepy is installed first: # pip install moviepy==1.0.3 try: from moviepy.editor import VideoFileClip # type: ignore _HAS_MOVIEPY = True except Exception: _HAS_MOVIEPY = False def run_single_image_example(model: Ovis, image_path: str) -> None: """ Run an inference example with a single image input. """ print("--- 1) Single-image example ---") images = [Image.open(image_path).convert("RGB")] prompt = "\nDescribe this image in detail." print(f"Prompt:\n{prompt}") response, _, _ = model.chat( prompt=prompt, images=images, min_pixels=448 * 448, max_pixels=1792 * 1792, videos=None, do_sample=True, max_new_tokens=1024, ) print(f"\nResponse:\n{response}") def run_multi_image_example(model: Ovis, image_paths: list) -> None: """ Run an inference example with multiple image inputs. """ print("--- 2) Multi-image example ---") images = [Image.open(p).convert("RGB") for p in image_paths] prompt = "\n\n\nWhat is the relationship between the third image and the first two?" print(f"Prompt:\n{prompt}") response, _, _ = model.chat( prompt=prompt, images=images, min_pixels=448 * 448, max_pixels=896 * 896, videos=None, do_sample=True, max_new_tokens=1024, ) print(f"\nResponse:\n{response}") def run_video_example(model: Ovis, video_path: str, num_frames: int = 8) -> None: """ Run an inference example with a video input. """ if not _HAS_MOVIEPY: raise ImportError( "moviepy is not installed. Install it with `pip install moviepy==1.0.3` to use video examples." ) print("--- 3) Video example ---") with VideoFileClip(video_path) as clip: total_frames = int(clip.fps * clip.duration) indices = [int(i * total_frames / num_frames) for i in range(num_frames)] frames = [ Image.fromarray(clip.get_frame(t)) for t in (index / clip.fps for index in indices) ] videos = [frames] prompt = "