"...wan/git@developer.sourcefind.cn:Wenxuan/LightX2V.git" did not exist on "236df981824cb4237c625dfd6fee12c0212db351"
Unverified Commit a0e69a93 authored by Zineng Tang's avatar Zineng Tang Committed by GitHub
Browse files

Add TVLT (#20725)



* Update image_processing_tvlt.py

* Update modeling_tvlt.py

* Update

* Update modeling_tvlt.py

* Create tvlt.mdx

* Update configuration_tvlt.py

* Update modeling_tvlt.py

* Update test_modeling_tvlt.py

* Update modeling_tvlt.py

* Update modeling_tvlt.py

* Update image_processing_tvlt.py

* Update feature_extraction_tvlt.py

* Update tvlt models

* Update tests

* Update

* Update

* Update tests

* Update README_ko.md

* Update README_ja.md

* Update README_ko.md

* Update README_zh-hans.md

* Update docs/source/en/model_doc/tvlt.mdx
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>

* Update docs/source/en/model_doc/tvlt.mdx
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>

* Update tvlt.mdx

* Update modeling_tvlt.py

* Update configuration_tvlt.py

* Update modeling_tvlt.py

* Update modeling_tvlt.py

* Update modeling_tvlt.py

* Update modeling_tvlt.py

* Add files via upload

* Update model

* Update modeling_tvlt.py

* Update tvlt models

* Update src/transformers/models/tvlt/__init__.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/__init__.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/image_processing_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/image_processing_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Add files via upload

* Add files via upload

* Delete modeling_tvlt.py

* Delete feature_extraction_tvlt.py

* Delete configuration_tvlt.py

* Delete image_processing_tvlt.py

* Delete processing_tvlt.py

* Update tvlt

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/models/tvlt/image_processing_tvlt.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update tests/models/tvlt/test_modeling_tvlt.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update tests/models/tvlt/test_modeling_tvlt.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/models/tvlt/modeling_tvlt.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/models/tvlt/modeling_tvlt.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update README.md
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update README_es.md

* Update README_hd.md

* Update README_ja.md

* Update README_ko.md

* Update README_zh-hans.md

* Update README_zh-hant.md

* Update index.mdx

* Update tvlt.mdx

* Update tvlt.mdx

* Update configuration_tvlt.py

* Update src/transformers/models/tvlt/image_processing_tvlt.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/models/tvlt/image_processing_tvlt.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/models/tvlt/image_processing_tvlt.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/models/tvlt/image_processing_tvlt.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/models/tvlt/modeling_tvlt.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update modeling_tvlt.py

* Add files via upload

* Update tvlt.mdx

* Update modeling_auto.py

* Add files via upload

* Add files via upload

* Update dummy_pt_objects.py

* Update __init__.py

* Update feature_extraction_tvlt.py

* Update feature_extraction_tvlt.py

* Update image_processing_tvlt.py

* Update modeling_auto.py

* Update test_feature_extraction_tvlt.py

* Update test_processor_tvlt.py

* Update test_feature_extraction_tvlt.py

* Add files via upload

* Update test_image_processor_tvlt.py

* Update tests/models/tvlt/test_processor_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/modeling_tvlt.py
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>

* Update src/transformers/models/tvlt/processing_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/tvlt/test_image_processor_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/modeling_tvlt.py
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>

* Update tests/models/tvlt/test_image_processor_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/tvlt/test_image_processor_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/tvlt/test_image_processor_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/tvlt/test_modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/modeling_tvlt.py
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>

* Update src/transformers/models/tvlt/modeling_tvlt.py
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/tvlt/test_feature_extraction_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/processing_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update docs/source/en/model_doc/tvlt.mdx
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/feature_extraction_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/feature_extraction_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/feature_extraction_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/feature_extraction_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update feature_extraction_tvlt.py

* Update feature_extraction_tvlt.py

* Update src/transformers/models/tvlt/image_processing_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/image_processing_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update image_processing_tvlt.py

* Update src/transformers/models/tvlt/modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update modeling_tvlt.py

* Update modeling_tvlt.py

* Update modeling_tvlt.py

* Update test_image_processor_tvlt.py

* Update tests/models/tvlt/test_modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/tvlt/test_modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/tvlt/test_modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/tvlt/test_modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/tvlt/test_modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/tvlt/test_modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/tvlt/test_modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/tvlt/test_modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/models/tvlt/test_modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Add files via upload

* Add files via upload

* Update modeling_tvlt.py

* Update modeling_tvlt.py

* Update modeling_tvlt.py

* Add files via upload

* Update docs/source/en/model_doc/tvlt.mdx
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update image_processing_tvlt.py

* Add files via upload

* Add files via upload

* Update tvlt.mdx

* Update docs/source/en/model_doc/tvlt.mdx
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>

* Update docs/source/en/model_doc/tvlt.mdx
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>

* Update src/transformers/models/tvlt/image_processing_tvlt.py
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>

* Update src/transformers/models/tvlt/image_processing_tvlt.py
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>

* Update docs/source/en/model_doc/tvlt.mdx
Co-authored-by: default avatarSanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>

* Update docs/source/en/model_doc/tvlt.mdx
Co-authored-by: default avatarSanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avatarSanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>

* Add files via upload

* Add files via upload

* Add files via upload

* Add files via upload

* Update modeling_auto.py

* Update tvlt.mdx

* Update dummy_pt_objects.py

* Update feature_extraction_tvlt.py

* Update modeling_tvlt.py

* Update test_feature_extraction_tvlt.py

* Update test_image_processor_tvlt.py

* Update test_feature_extraction_tvlt.py

* Update modeling_tvlt.py

* Update dummy_pt_objects.py

* Update dummy_speech_objects.py

* Add files via upload

* Update README_hd.md

* Update modeling_tvlt.py

* Update modeling_tvlt.py

* Update modeling_tvlt.py

* Update modeling_tvlt.py

* Update modeling_tvlt.py

* Update modeling_tvlt.py

* Update test_modeling_tvlt.py

* Update src/transformers/models/tvlt/configuration_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/feature_extraction_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/image_processing_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/image_processing_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/image_processing_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update MAE processing

* Update modeling_tvlt.py

* Update modeling_tvlt.py

* Update modeling

* Update style

* Update src/transformers/models/tvlt/modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update src/transformers/models/tvlt/modeling_tvlt.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update check_repo.py

* Update tvlt.mdx

* Update __init__.py

* Update tests

* Update tvlt models

* Update configuration_tvlt.py

* Update configuration_tvlt.py

* Update image_processing_tvlt.py

* Update dummy_pt_objects.py

* Add files via upload

* Update test_modeling_tvlt.py

* Update test_feature_extraction_tvlt.py

* Update test_feature_extraction_tvlt.py

* Update test_feature_extraction_tvlt.py

* Update test_feature_extraction_tvlt.py

* Update test_feature_extraction_tvlt.py

* Update test_feature_extraction_tvlt.py

---------
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>
Co-authored-by: default avatarSanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>
parent 7bac5183
...@@ -6068,6 +6068,37 @@ class TrOCRPreTrainedModel(metaclass=DummyObject): ...@@ -6068,6 +6068,37 @@ class TrOCRPreTrainedModel(metaclass=DummyObject):
requires_backends(self, ["torch"]) requires_backends(self, ["torch"])
TVLT_PRETRAINED_MODEL_ARCHIVE_LIST = None
class TvltForAudioVisualClassification(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
class TvltForPreTraining(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
class TvltModel(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
class TvltPreTrainedModel(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
UNISPEECH_PRETRAINED_MODEL_ARCHIVE_LIST = None UNISPEECH_PRETRAINED_MODEL_ARCHIVE_LIST = None
......
...@@ -28,3 +28,10 @@ class SpeechT5FeatureExtractor(metaclass=DummyObject): ...@@ -28,3 +28,10 @@ class SpeechT5FeatureExtractor(metaclass=DummyObject):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
requires_backends(self, ["speech"]) requires_backends(self, ["speech"])
class TvltFeatureExtractor(metaclass=DummyObject):
_backends = ["speech"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["speech"])
...@@ -415,6 +415,13 @@ class Swin2SRImageProcessor(metaclass=DummyObject): ...@@ -415,6 +415,13 @@ class Swin2SRImageProcessor(metaclass=DummyObject):
requires_backends(self, ["vision"]) requires_backends(self, ["vision"])
class TvltImageProcessor(metaclass=DummyObject):
_backends = ["vision"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
class VideoMAEFeatureExtractor(metaclass=DummyObject): class VideoMAEFeatureExtractor(metaclass=DummyObject):
_backends = ["vision"] _backends = ["vision"]
......
# coding=utf-8
# Copyright 2023 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Testing suite for the TVLT feature extraction. """
import itertools
import os
import random
import tempfile
import unittest
import numpy as np
from transformers import is_datasets_available, is_speech_available
from transformers.testing_utils import check_json_file_has_correct_format, require_torch, require_torchaudio
from transformers.utils.import_utils import is_torch_available
from ...test_sequence_feature_extraction_common import SequenceFeatureExtractionTestMixin
if is_torch_available():
import torch
if is_datasets_available():
from datasets import load_dataset
if is_speech_available():
from transformers import TvltFeatureExtractor
global_rng = random.Random()
def floats_list(shape, scale=1.0, rng=None, name=None):
"""Creates a random float32 tensor"""
if rng is None:
rng = global_rng
values = []
for batch_idx in range(shape[0]):
values.append([])
for _ in range(shape[1]):
values[-1].append(rng.random() * scale)
return values
class TvltFeatureExtractionTester(unittest.TestCase):
def __init__(
self,
parent,
batch_size=7,
min_seq_length=400,
max_seq_length=2000,
spectrogram_length=2048,
feature_size=128,
num_audio_channels=1,
hop_length=512,
chunk_length=30,
sampling_rate=44100,
):
self.parent = parent
self.batch_size = batch_size
self.min_seq_length = min_seq_length
self.max_seq_length = max_seq_length
self.seq_length_diff = (self.max_seq_length - self.min_seq_length) // (self.batch_size - 1)
self.spectrogram_length = spectrogram_length
self.feature_size = feature_size
self.num_audio_channels = num_audio_channels
self.hop_length = hop_length
self.chunk_length = chunk_length
self.sampling_rate = sampling_rate
def prepare_feat_extract_dict(self):
return {
"spectrogram_length": self.spectrogram_length,
"feature_size": self.feature_size,
"num_audio_channels": self.num_audio_channels,
"hop_length": self.hop_length,
"chunk_length": self.chunk_length,
"sampling_rate": self.sampling_rate,
}
def prepare_inputs_for_common(self, equal_length=False, numpify=False):
def _flatten(list_of_lists):
return list(itertools.chain(*list_of_lists))
if equal_length:
speech_inputs = [floats_list((self.max_seq_length, self.feature_size)) for _ in range(self.batch_size)]
else:
# make sure that inputs increase in size
speech_inputs = [
floats_list((x, self.feature_size))
for x in range(self.min_seq_length, self.max_seq_length, self.seq_length_diff)
]
if numpify:
speech_inputs = [np.asarray(x) for x in speech_inputs]
return speech_inputs
@require_torch
@require_torchaudio
class TvltFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.TestCase):
feature_extraction_class = TvltFeatureExtractor if is_speech_available() else None
def setUp(self):
self.feat_extract_tester = TvltFeatureExtractionTester(self)
def test_feat_extract_properties(self):
feature_extractor = self.feature_extraction_class(**self.feat_extract_dict)
self.assertTrue(hasattr(feature_extractor, "spectrogram_length"))
self.assertTrue(hasattr(feature_extractor, "feature_size"))
self.assertTrue(hasattr(feature_extractor, "num_audio_channels"))
self.assertTrue(hasattr(feature_extractor, "hop_length"))
self.assertTrue(hasattr(feature_extractor, "chunk_length"))
self.assertTrue(hasattr(feature_extractor, "sampling_rate"))
def test_feat_extract_from_and_save_pretrained(self):
feat_extract_first = self.feature_extraction_class(**self.feat_extract_dict)
with tempfile.TemporaryDirectory() as tmpdirname:
saved_file = feat_extract_first.save_pretrained(tmpdirname)[0]
check_json_file_has_correct_format(saved_file)
feat_extract_second = self.feature_extraction_class.from_pretrained(tmpdirname)
dict_first = feat_extract_first.to_dict()
dict_second = feat_extract_second.to_dict()
mel_1 = dict_first.pop("mel_filters")
mel_2 = dict_second.pop("mel_filters")
self.assertTrue(np.allclose(mel_1, mel_2))
self.assertEqual(dict_first, dict_second)
def test_feat_extract_to_json_file(self):
feat_extract_first = self.feature_extraction_class(**self.feat_extract_dict)
with tempfile.TemporaryDirectory() as tmpdirname:
json_file_path = os.path.join(tmpdirname, "feat_extract.json")
feat_extract_first.to_json_file(json_file_path)
feat_extract_second = self.feature_extraction_class.from_json_file(json_file_path)
dict_first = feat_extract_first.to_dict()
dict_second = feat_extract_second.to_dict()
mel_1 = dict_first.pop("mel_filters")
mel_2 = dict_second.pop("mel_filters")
self.assertTrue(np.allclose(mel_1, mel_2))
self.assertEqual(dict_first, dict_second)
def test_call(self):
# Initialize feature_extractor
feature_extractor = self.feature_extraction_class(**self.feat_extract_dict)
# create three inputs of length 800, 1000, and 1200
speech_inputs = [floats_list((1, x))[0] for x in range(8000, 14000, 20000)]
np_speech_inputs = [np.asarray(speech_input) for speech_input in speech_inputs]
# Test not batched input
encoded_audios = feature_extractor(np_speech_inputs[0], return_tensors="np", sampling_rate=44100).audio_values
self.assertTrue(encoded_audios.ndim == 4)
self.assertTrue(encoded_audios.shape[-1] == feature_extractor.feature_size)
self.assertTrue(encoded_audios.shape[-2] <= feature_extractor.spectrogram_length)
self.assertTrue(encoded_audios.shape[-3] == feature_extractor.num_channels)
# Test batched
encoded_audios = feature_extractor(np_speech_inputs, return_tensors="np", sampling_rate=44100).audio_values
self.assertTrue(encoded_audios.ndim == 4)
self.assertTrue(encoded_audios.shape[-1] == feature_extractor.feature_size)
self.assertTrue(encoded_audios.shape[-2] <= feature_extractor.spectrogram_length)
self.assertTrue(encoded_audios.shape[-3] == feature_extractor.num_channels)
# Test audio masking
encoded_audios = feature_extractor(
np_speech_inputs, return_tensors="np", sampling_rate=44100, mask_audio=True
).audio_values
self.assertTrue(encoded_audios.ndim == 4)
self.assertTrue(encoded_audios.shape[-1] == feature_extractor.feature_size)
self.assertTrue(encoded_audios.shape[-2] <= feature_extractor.spectrogram_length)
self.assertTrue(encoded_audios.shape[-3] == feature_extractor.num_channels)
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
# automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
return [x["array"] for x in speech_samples]
def test_integration(self):
input_speech = self._load_datasamples(1)
feaure_extractor = TvltFeatureExtractor()
audio_values = feaure_extractor(input_speech, return_tensors="pt").audio_values
self.assertTrue(audio_values.shape, [1, 1, 192, 128])
expected_slice = torch.tensor([[-0.3032, -0.2708], [-0.4434, -0.4007]])
self.assertTrue(torch.allclose(audio_values[0, 0, :2, :2], expected_slice, atol=1e-4))
# coding=utf-8
# Copyright 2023 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Testing suite for the TVLT image processor. """
import unittest
import numpy as np
from transformers.testing_utils import require_torch, require_vision
from transformers.utils import is_torch_available, is_vision_available
from ...test_image_processing_common import ImageProcessingSavingTestMixin
if is_torch_available():
import torch
if is_vision_available():
from PIL import Image
from transformers import TvltImageProcessor
def prepare_video(image_processor_tester, width=10, height=10, numpify=False, torchify=False):
"""This function prepares a video as a list of PIL images/NumPy arrays/PyTorch tensors."""
video = []
for i in range(image_processor_tester.num_frames):
video.append(np.random.randint(255, size=(image_processor_tester.num_channels, width, height), dtype=np.uint8))
if not numpify and not torchify:
# PIL expects the channel dimension as last dimension
video = [Image.fromarray(np.moveaxis(frame, 0, -1)) for frame in video]
if torchify:
video = [torch.from_numpy(frame) for frame in video]
return video
def prepare_video_inputs(image_processor_tester, equal_resolution=False, numpify=False, torchify=False):
"""This function prepares a batch of videos: a list of list of PIL images, or a list of list of numpy arrays if
one specifies numpify=True, or a list of list of PyTorch tensors if one specifies torchify=True.
One can specify whether the videos are of the same resolution or not.
"""
assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time"
video_inputs = []
for i in range(image_processor_tester.batch_size):
if equal_resolution:
width = height = image_processor_tester.max_resolution
else:
width, height = np.random.choice(
np.arange(image_processor_tester.min_resolution, image_processor_tester.max_resolution), 2
)
video = prepare_video(
image_processor_tester=image_processor_tester,
width=width,
height=height,
numpify=numpify,
torchify=torchify,
)
video_inputs.append(video)
return video_inputs
class TvltImageProcessorTester(unittest.TestCase):
def __init__(
self,
parent,
batch_size=7,
num_channels=3,
num_frames=4,
image_size=18,
min_resolution=30,
max_resolution=400,
do_resize=True,
size=None,
do_normalize=True,
image_mean=[0.5, 0.5, 0.5],
image_std=[0.5, 0.5, 0.5],
do_center_crop=True,
crop_size=None,
):
size = size if size is not None else {"shortest_edge": 18}
crop_size = crop_size if crop_size is not None else {"height": 18, "width": 18}
self.parent = parent
self.batch_size = batch_size
self.num_channels = num_channels
self.num_frames = num_frames
self.image_size = image_size
self.min_resolution = min_resolution
self.max_resolution = max_resolution
self.do_resize = do_resize
self.size = size
self.do_normalize = do_normalize
self.image_mean = image_mean
self.image_std = image_std
self.do_center_crop = do_center_crop
self.crop_size = crop_size
def prepare_image_processor_dict(self):
return {
"image_mean": self.image_mean,
"image_std": self.image_std,
"do_normalize": self.do_normalize,
"do_resize": self.do_resize,
"size": self.size,
"do_center_crop": self.do_center_crop,
"crop_size": self.crop_size,
}
@require_torch
@require_vision
class TvltImageProcessorTest(ImageProcessingSavingTestMixin, unittest.TestCase):
image_processing_class = TvltImageProcessor if is_vision_available() else None
def setUp(self):
self.image_processor_tester = TvltImageProcessorTester(self)
@property
def image_processor_dict(self):
return self.image_processor_tester.prepare_image_processor_dict()
def test_image_processor_properties(self):
image_processor = self.image_processing_class(**self.image_processor_dict)
self.assertTrue(hasattr(image_processor, "image_mean"))
self.assertTrue(hasattr(image_processor, "image_std"))
self.assertTrue(hasattr(image_processor, "do_normalize"))
self.assertTrue(hasattr(image_processor, "do_resize"))
self.assertTrue(hasattr(image_processor, "do_center_crop"))
self.assertTrue(hasattr(image_processor, "size"))
def test_call_pil(self):
# Initialize image_processor
image_processor = self.image_processing_class(**self.image_processor_dict)
# create random PIL videos
video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False)
for video in video_inputs:
self.assertIsInstance(video, list)
self.assertIsInstance(video[0], Image.Image)
# Test not batched input
encoded_videos = image_processor(video_inputs[0], return_tensors="pt").pixel_values
self.assertEqual(
encoded_videos.shape,
(
1,
self.image_processor_tester.num_frames,
self.image_processor_tester.num_channels,
self.image_processor_tester.crop_size["height"],
self.image_processor_tester.crop_size["width"],
),
)
# Test batched
encoded_videos = image_processor(video_inputs, return_tensors="pt").pixel_values
self.assertEqual(
encoded_videos.shape,
(
self.image_processor_tester.batch_size,
self.image_processor_tester.num_frames,
self.image_processor_tester.num_channels,
self.image_processor_tester.crop_size["height"],
self.image_processor_tester.crop_size["width"],
),
)
def test_call_numpy(self):
# Initialize image_processor
image_processor = self.image_processing_class(**self.image_processor_dict)
# create random numpy tensors
video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False, numpify=True)
for video in video_inputs:
self.assertIsInstance(video, list)
self.assertIsInstance(video[0], np.ndarray)
# Test not batched input
encoded_videos = image_processor(video_inputs[0], return_tensors="pt").pixel_values
self.assertEqual(
encoded_videos.shape,
(
1,
self.image_processor_tester.num_frames,
self.image_processor_tester.num_channels,
self.image_processor_tester.crop_size["height"],
self.image_processor_tester.crop_size["width"],
),
)
# Test batched
encoded_videos = image_processor(video_inputs, return_tensors="pt").pixel_values
self.assertEqual(
encoded_videos.shape,
(
self.image_processor_tester.batch_size,
self.image_processor_tester.num_frames,
self.image_processor_tester.num_channels,
self.image_processor_tester.crop_size["height"],
self.image_processor_tester.crop_size["width"],
),
)
def test_call_pytorch(self):
# Initialize image_processor
image_processor = self.image_processing_class(**self.image_processor_dict)
# create random PyTorch tensors
video_inputs = prepare_video_inputs(self.image_processor_tester, equal_resolution=False, torchify=True)
for video in video_inputs:
self.assertIsInstance(video, list)
self.assertIsInstance(video[0], torch.Tensor)
# Test not batched input
encoded_videos = image_processor(video_inputs[0], return_tensors="pt").pixel_values
self.assertEqual(
encoded_videos.shape,
(
1,
self.image_processor_tester.num_frames,
self.image_processor_tester.num_channels,
self.image_processor_tester.crop_size["height"],
self.image_processor_tester.crop_size["width"],
),
)
# Test batched
encoded_videos = image_processor(video_inputs, return_tensors="pt").pixel_values
self.assertEqual(
encoded_videos.shape,
(
self.image_processor_tester.batch_size,
self.image_processor_tester.num_frames,
self.image_processor_tester.num_channels,
self.image_processor_tester.crop_size["height"],
self.image_processor_tester.crop_size["width"],
),
)
This diff is collapsed.
# Copyright 2023 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import shutil
import tempfile
import unittest
import numpy as np
import pytest
from transformers import is_speech_available, is_vision_available
from transformers.testing_utils import require_torch
if is_vision_available():
from transformers import TvltImageProcessor
if is_speech_available():
from transformers import TvltFeatureExtractor
from transformers import TvltProcessor
@require_torch
class TvltProcessorTest(unittest.TestCase):
def setUp(self):
self.checkpoint = "ZinengTang/tvlt-base"
self.tmpdirname = tempfile.mkdtemp()
def get_image_processor(self, **kwargs):
return TvltImageProcessor.from_pretrained(self.checkpoint, **kwargs)
def get_feature_extractor(self, **kwargs):
return TvltFeatureExtractor.from_pretrained(self.checkpoint, **kwargs)
def tearDown(self):
shutil.rmtree(self.tmpdirname)
def test_save_load_pretrained_default(self):
image_processor = self.get_image_processor()
feature_extractor = self.get_feature_extractor()
processor = TvltProcessor(image_processor=image_processor, feature_extractor=feature_extractor)
processor.save_pretrained(self.tmpdirname)
processor = TvltProcessor.from_pretrained(self.tmpdirname)
self.assertIsInstance(processor.feature_extractor, TvltFeatureExtractor)
self.assertIsInstance(processor.image_processor, TvltImageProcessor)
def test_feature_extractor(self):
image_processor = self.get_image_processor()
feature_extractor = self.get_feature_extractor()
processor = TvltProcessor(image_processor=image_processor, feature_extractor=feature_extractor)
audio = np.ones([12000])
audio_dict = feature_extractor(audio, return_tensors="np")
input_processor = processor(audio=audio, return_tensors="np")
for key in audio_dict.keys():
self.assertAlmostEqual(audio_dict[key].sum(), input_processor[key].sum(), delta=1e-2)
def test_image_processor(self):
image_processor = self.get_image_processor()
feature_extractor = self.get_feature_extractor()
processor = TvltProcessor(image_processor=image_processor, feature_extractor=feature_extractor)
images = np.ones([3, 224, 224])
image_dict = image_processor(images, return_tensors="np")
input_processor = processor(images=images, return_tensors="np")
for key in image_dict.keys():
self.assertAlmostEqual(image_dict[key].sum(), input_processor[key].sum(), delta=1e-2)
def test_processor(self):
image_processor = self.get_image_processor()
feature_extractor = self.get_feature_extractor()
processor = TvltProcessor(image_processor=image_processor, feature_extractor=feature_extractor)
audio = np.ones([12000])
images = np.ones([3, 224, 224])
inputs = processor(audio=audio, images=images)
self.assertListEqual(list(inputs.keys()), ["audio_values", "audio_mask", "pixel_values", "pixel_mask"])
# test if it raises when no input is passed
with pytest.raises(ValueError):
processor()
def test_model_input_names(self):
image_processor = self.get_image_processor()
feature_extractor = self.get_feature_extractor()
processor = TvltProcessor(image_processor=image_processor, feature_extractor=feature_extractor)
self.assertListEqual(
processor.model_input_names,
image_processor.model_input_names + feature_extractor.model_input_names,
msg="`processor` and `image_processor`+`feature_extractor` model input names do not match",
)
...@@ -287,6 +287,7 @@ IGNORE_NON_AUTO_CONFIGURED = PRIVATE_MODELS.copy() + [ ...@@ -287,6 +287,7 @@ IGNORE_NON_AUTO_CONFIGURED = PRIVATE_MODELS.copy() + [
"AltCLIPTextModel", "AltCLIPTextModel",
"AltCLIPVisionModel", "AltCLIPVisionModel",
"AltRobertaModel", "AltRobertaModel",
"TvltForAudioVisualClassification",
"SpeechT5ForSpeechToSpeech", "SpeechT5ForSpeechToSpeech",
"SpeechT5ForTextToSpeech", "SpeechT5ForTextToSpeech",
"SpeechT5HifiGan", "SpeechT5HifiGan",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment