"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "075e821d1db5582e94084f2f2eeb8f04479d9328"
Unverified Commit d625294d authored by Raushan Turganbay's avatar Raushan Turganbay Committed by GitHub
Browse files

InstructBlipVideo: Update docstring (#31886)

* update docs

* one more change
parent c54af4c7
...@@ -158,7 +158,8 @@ class InstructBlipVideoForConditionalGeneration(InstructBlipForConditionalGenera ...@@ -158,7 +158,8 @@ class InstructBlipVideoForConditionalGeneration(InstructBlipForConditionalGenera
>>> from transformers import InstructBlipVideoProcessor, InstructBlipVideoForConditionalGeneration >>> from transformers import InstructBlipVideoProcessor, InstructBlipVideoForConditionalGeneration
>>> import torch >>> import torch
>>> from huggingface_hub import hf_hub_download >>> from huggingface_hub import hf_hub_download
>>> from av >>> import av
>>> import numpy as np
>>> def read_video_pyav(container, indices): >>> def read_video_pyav(container, indices):
... ''' ... '''
...@@ -180,20 +181,21 @@ class InstructBlipVideoForConditionalGeneration(InstructBlipForConditionalGenera ...@@ -180,20 +181,21 @@ class InstructBlipVideoForConditionalGeneration(InstructBlipForConditionalGenera
... frames.append(frame) ... frames.append(frame)
... return np.stack([x.to_ndarray(format="rgb24") for x in frames]) ... return np.stack([x.to_ndarray(format="rgb24") for x in frames])
>>> model = InstructBlipVideoProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b", device_map="auto") >>> model = InstructBlipVideoForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b", device_map="auto")
>>> processor = InstructBlipVideoForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b") >>> processor = InstructBlipVideoProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
>>> file_path = hf_hub_download( >>> file_path = hf_hub_download(
repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset" ... repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset"
) ... )
>>> container = av.open(video_path) >>> container = av.open(file_path)
>>> # sample uniformly 4 frames from the videWhy is this video funny?o >>> # sample uniformly 4 frames from the videWhy is this video funny?o
>>> total_frames = container.streams.video[0].frames >>> total_frames = container.streams.video[0].frames
>>> indices = np.arange(0, total_frames, total_frames / 4).astype(int) >>> indices = np.arange(0, total_frames, total_frames / 4).astype(int)
>>> clip = read_video_pyav(container, indices) >>> clip = read_video_pyav(container, indices)
>>> prompt = "What is happening in the video?" >>> prompt = "What is happening in the video?"
>>> inputs = processor(videos=clip, text=prompt, return_tensors="pt").to(device) >>> inputs = processor(text=prompt, images=clip, return_tensors="pt").to(model.device)
>>> outputs = model.generate( >>> outputs = model.generate(
... **inputs, ... **inputs,
......
...@@ -1393,7 +1393,8 @@ class InstructBlipVideoForConditionalGeneration(InstructBlipVideoPreTrainedModel ...@@ -1393,7 +1393,8 @@ class InstructBlipVideoForConditionalGeneration(InstructBlipVideoPreTrainedModel
>>> from transformers import InstructBlipVideoProcessor, InstructBlipVideoForConditionalGeneration >>> from transformers import InstructBlipVideoProcessor, InstructBlipVideoForConditionalGeneration
>>> import torch >>> import torch
>>> from huggingface_hub import hf_hub_download >>> from huggingface_hub import hf_hub_download
>>> from av >>> import av
>>> import numpy as np
>>> def read_video_pyav(container, indices): >>> def read_video_pyav(container, indices):
... ''' ... '''
...@@ -1415,20 +1416,21 @@ class InstructBlipVideoForConditionalGeneration(InstructBlipVideoPreTrainedModel ...@@ -1415,20 +1416,21 @@ class InstructBlipVideoForConditionalGeneration(InstructBlipVideoPreTrainedModel
... frames.append(frame) ... frames.append(frame)
... return np.stack([x.to_ndarray(format="rgb24") for x in frames]) ... return np.stack([x.to_ndarray(format="rgb24") for x in frames])
>>> model = InstructBlipVideoProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b", device_map="auto") >>> model = InstructBlipVideoForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b", device_map="auto")
>>> processor = InstructBlipVideoForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b") >>> processor = InstructBlipVideoProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
>>> file_path = hf_hub_download( >>> file_path = hf_hub_download(
repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset" ... repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset"
) ... )
>>> container = av.open(video_path) >>> container = av.open(file_path)
>>> # sample uniformly 4 frames from the videWhy is this video funny?o >>> # sample uniformly 4 frames from the videWhy is this video funny?o
>>> total_frames = container.streams.video[0].frames >>> total_frames = container.streams.video[0].frames
>>> indices = np.arange(0, total_frames, total_frames / 4).astype(int) >>> indices = np.arange(0, total_frames, total_frames / 4).astype(int)
>>> clip = read_video_pyav(container, indices) >>> clip = read_video_pyav(container, indices)
>>> prompt = "What is happening in the video?" >>> prompt = "What is happening in the video?"
>>> inputs = processor(videos=clip, text=prompt, return_tensors="pt").to(device) >>> inputs = processor(text=prompt, images=clip, return_tensors="pt").to(model.device)
>>> outputs = model.generate( >>> outputs = model.generate(
... **inputs, ... **inputs,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment