Unverified Commit c25f27fa authored by NielsRogge's avatar NielsRogge Committed by GitHub
Browse files

[VideoMAE] Improve code examples (#18919)

* Simplify code example

* Add seed
parent 0a632f07
...@@ -598,21 +598,18 @@ class VideoMAEModel(VideoMAEPreTrainedModel): ...@@ -598,21 +598,18 @@ class VideoMAEModel(VideoMAEPreTrainedModel):
>>> file_path = hf_hub_download( >>> file_path = hf_hub_download(
... repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset" ... repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset"
... ) ... )
>>> vr = VideoReader(file_path, num_threads=1, ctx=cpu(0)) >>> videoreader = VideoReader(file_path, num_threads=1, ctx=cpu(0))
>>> # sample 16 frames >>> # sample 16 frames
>>> vr.seek(0) >>> videoreader.seek(0)
>>> indices = sample_frame_indices(clip_len=16, frame_sample_rate=4, seg_len=len(vr)) >>> indices = sample_frame_indices(clip_len=16, frame_sample_rate=4, seg_len=len(videoreader))
>>> buffer = vr.get_batch(indices).asnumpy() >>> video = videoreader.get_batch(indices).asnumpy()
>>> # create a list of NumPy arrays
>>> video = [buffer[i] for i in range(buffer.shape[0])]
>>> feature_extractor = VideoMAEFeatureExtractor.from_pretrained("MCG-NJU/videomae-base") >>> feature_extractor = VideoMAEFeatureExtractor.from_pretrained("MCG-NJU/videomae-base")
>>> model = VideoMAEModel.from_pretrained("MCG-NJU/videomae-base") >>> model = VideoMAEModel.from_pretrained("MCG-NJU/videomae-base")
>>> # prepare video for the model >>> # prepare video for the model
>>> inputs = feature_extractor(video, return_tensors="pt") >>> inputs = feature_extractor(list(video), return_tensors="pt")
>>> # forward pass >>> # forward pass
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
...@@ -943,10 +940,13 @@ class VideoMAEForVideoClassification(VideoMAEPreTrainedModel): ...@@ -943,10 +940,13 @@ class VideoMAEForVideoClassification(VideoMAEPreTrainedModel):
```python ```python
>>> from decord import VideoReader, cpu >>> from decord import VideoReader, cpu
>>> import torch >>> import torch
>>> import numpy as np
>>> from transformers import VideoMAEFeatureExtractor, VideoMAEForVideoClassification >>> from transformers import VideoMAEFeatureExtractor, VideoMAEForVideoClassification
>>> from huggingface_hub import hf_hub_download >>> from huggingface_hub import hf_hub_download
>>> np.random.seed(0)
>>> def sample_frame_indices(clip_len, frame_sample_rate, seg_len): >>> def sample_frame_indices(clip_len, frame_sample_rate, seg_len):
... converted_len = int(clip_len * frame_sample_rate) ... converted_len = int(clip_len * frame_sample_rate)
...@@ -961,20 +961,17 @@ class VideoMAEForVideoClassification(VideoMAEPreTrainedModel): ...@@ -961,20 +961,17 @@ class VideoMAEForVideoClassification(VideoMAEPreTrainedModel):
>>> file_path = hf_hub_download( >>> file_path = hf_hub_download(
... repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset" ... repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset"
... ) ... )
>>> vr = VideoReader(file_path, num_threads=1, ctx=cpu(0)) >>> videoreader = VideoReader(file_path, num_threads=1, ctx=cpu(0))
>>> # sample 16 frames >>> # sample 16 frames
>>> vr.seek(0) >>> videoreader.seek(0)
>>> indices = sample_frame_indices(clip_len=16, frame_sample_rate=4, seg_len=len(vr)) >>> indices = sample_frame_indices(clip_len=16, frame_sample_rate=4, seg_len=len(videoreader))
>>> buffer = vr.get_batch(indices).asnumpy() >>> video = videoreader.get_batch(indices).asnumpy()
>>> # create a list of NumPy arrays
>>> video = [buffer[i] for i in range(buffer.shape[0])]
>>> feature_extractor = VideoMAEFeatureExtractor.from_pretrained("MCG-NJU/videomae-base-finetuned-kinetics") >>> feature_extractor = VideoMAEFeatureExtractor.from_pretrained("MCG-NJU/videomae-base-finetuned-kinetics")
>>> model = VideoMAEForVideoClassification.from_pretrained("MCG-NJU/videomae-base-finetuned-kinetics") >>> model = VideoMAEForVideoClassification.from_pretrained("MCG-NJU/videomae-base-finetuned-kinetics")
>>> inputs = feature_extractor(video, return_tensors="pt") >>> inputs = feature_extractor(list(video), return_tensors="pt")
>>> with torch.no_grad(): >>> with torch.no_grad():
... outputs = model(**inputs) ... outputs = model(**inputs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment