"...lm-evaluation-harness.git" did not exist on "10103d5de3177b6a59917c73916a47a793fcb28b"
Unverified Commit ae454f41 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Update old existing feature extractor references (#24552)

* Update old existing feature extractor references

* Typo

* Apply suggestions from code review

* Apply suggestions from code review

* Apply suggestions from code review

* Address comments from review - update 'feature extractor'
Co-authored by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
parent 10c2ac7b
......@@ -513,8 +513,8 @@ TIMESFORMER_START_DOCSTRING = r"""
TIMESFORMER_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_frames, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See
[`VideoMAEFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`VideoMAEImageProcessor.preprocess`] for details.
output_attentions (`bool`, *optional*):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
......
......@@ -29,7 +29,7 @@ from transformers import (
TrOCRProcessor,
VisionEncoderDecoderModel,
ViTConfig,
ViTFeatureExtractor,
ViTImageProcessor,
ViTModel,
)
from transformers.utils import logging
......@@ -182,9 +182,9 @@ def convert_tr_ocr_checkpoint(checkpoint_url, pytorch_dump_folder_path):
model.load_state_dict(state_dict)
# Check outputs on an image
feature_extractor = ViTFeatureExtractor(size=encoder_config.image_size)
image_processor = ViTImageProcessor(size=encoder_config.image_size)
tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
processor = TrOCRProcessor(feature_extractor, tokenizer)
processor = TrOCRProcessor(image_processor, tokenizer)
pixel_values = processor(images=prepare_img(checkpoint_url), return_tensors="pt").pixel_values
......
......@@ -30,7 +30,7 @@ import torch.nn as nn
from huggingface_hub import cached_download, hf_hub_download
from torch import Tensor
from transformers import AutoFeatureExtractor, VanConfig, VanForImageClassification
from transformers import AutoImageProcessor, VanConfig, VanForImageClassification
from transformers.models.van.modeling_van import VanLayerScaling
from transformers.utils import logging
......@@ -154,10 +154,10 @@ def convert_weight_and_push(
)
# we can use the convnext one
feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/convnext-base-224-22k-1k")
feature_extractor.push_to_hub(
image_processor = AutoImageProcessor.from_pretrained("facebook/convnext-base-224-22k-1k")
image_processor.push_to_hub(
repo_path_or_name=save_directory / checkpoint_name,
commit_message="Add feature extractor",
commit_message="Add image processor",
use_temp_dir=True,
)
......@@ -277,7 +277,7 @@ if __name__ == "__main__":
default=True,
type=bool,
required=False,
help="If True, push model and feature extractor to the hub.",
help="If True, push model and image processor to the hub.",
)
args = parser.parse_args()
......
......@@ -24,9 +24,9 @@ from huggingface_hub import hf_hub_download
from transformers import (
VideoMAEConfig,
VideoMAEFeatureExtractor,
VideoMAEForPreTraining,
VideoMAEForVideoClassification,
VideoMAEImageProcessor,
)
......@@ -198,9 +198,9 @@ def convert_videomae_checkpoint(checkpoint_url, pytorch_dump_folder_path, model_
model.eval()
# verify model on basic input
feature_extractor = VideoMAEFeatureExtractor(image_mean=[0.5, 0.5, 0.5], image_std=[0.5, 0.5, 0.5])
image_processor = VideoMAEImageProcessor(image_mean=[0.5, 0.5, 0.5], image_std=[0.5, 0.5, 0.5])
video = prepare_video()
inputs = feature_extractor(video, return_tensors="pt")
inputs = image_processor(video, return_tensors="pt")
if "finetuned" not in model_name:
local_path = hf_hub_download(repo_id="hf-internal-testing/bool-masked-pos", filename="bool_masked_pos.pt")
......@@ -288,8 +288,8 @@ def convert_videomae_checkpoint(checkpoint_url, pytorch_dump_folder_path, model_
print("Loss ok!")
if pytorch_dump_folder_path is not None:
print(f"Saving model and feature extractor to {pytorch_dump_folder_path}")
feature_extractor.save_pretrained(pytorch_dump_folder_path)
print(f"Saving model and image processor to {pytorch_dump_folder_path}")
image_processor.save_pretrained(pytorch_dump_folder_path)
model.save_pretrained(pytorch_dump_folder_path)
if push_to_hub:
......
......@@ -27,11 +27,11 @@ from PIL import Image
from transformers import (
BertTokenizer,
ViltConfig,
ViltFeatureExtractor,
ViltForImageAndTextRetrieval,
ViltForImagesAndTextClassification,
ViltForMaskedLM,
ViltForQuestionAnswering,
ViltImageProcessor,
ViltProcessor,
)
from transformers.utils import logging
......@@ -223,9 +223,9 @@ def convert_vilt_checkpoint(checkpoint_url, pytorch_dump_folder_path):
model.load_state_dict(state_dict)
# Define processor
feature_extractor = ViltFeatureExtractor(size=384)
image_processor = ViltImageProcessor(size=384)
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
processor = ViltProcessor(feature_extractor, tokenizer)
processor = ViltProcessor(image_processor, tokenizer)
# Forward pass on example inputs (image + text)
if nlvr_model:
......
......@@ -24,7 +24,7 @@ import torch
from huggingface_hub import hf_hub_download
from PIL import Image
from transformers import ViTConfig, ViTFeatureExtractor, ViTForImageClassification, ViTModel
from transformers import ViTConfig, ViTForImageClassification, ViTImageProcessor, ViTModel
from transformers.utils import logging
......@@ -175,9 +175,9 @@ def convert_vit_checkpoint(model_name, pytorch_dump_folder_path, base_model=True
model = ViTForImageClassification(config).eval()
model.load_state_dict(state_dict)
# Check outputs on an image, prepared by ViTFeatureExtractor
feature_extractor = ViTFeatureExtractor()
encoding = feature_extractor(images=prepare_img(), return_tensors="pt")
# Check outputs on an image, prepared by ViTImageProcessor
image_processor = ViTImageProcessor()
encoding = image_processor(images=prepare_img(), return_tensors="pt")
pixel_values = encoding["pixel_values"]
outputs = model(pixel_values)
......@@ -192,8 +192,8 @@ def convert_vit_checkpoint(model_name, pytorch_dump_folder_path, base_model=True
Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
print(f"Saving model {model_name} to {pytorch_dump_folder_path}")
model.save_pretrained(pytorch_dump_folder_path)
print(f"Saving feature extractor to {pytorch_dump_folder_path}")
feature_extractor.save_pretrained(pytorch_dump_folder_path)
print(f"Saving image processor to {pytorch_dump_folder_path}")
image_processor.save_pretrained(pytorch_dump_folder_path)
if __name__ == "__main__":
......
......@@ -25,7 +25,7 @@ import torch
from huggingface_hub import hf_hub_download
from PIL import Image
from transformers import DeiTFeatureExtractor, ViTConfig, ViTFeatureExtractor, ViTForImageClassification, ViTModel
from transformers import DeiTImageProcessor, ViTConfig, ViTForImageClassification, ViTImageProcessor, ViTModel
from transformers.utils import logging
......@@ -208,12 +208,12 @@ def convert_vit_checkpoint(vit_name, pytorch_dump_folder_path):
model = ViTForImageClassification(config).eval()
model.load_state_dict(state_dict)
# Check outputs on an image, prepared by ViTFeatureExtractor/DeiTFeatureExtractor
# Check outputs on an image, prepared by ViTImageProcessor/DeiTImageProcessor
if "deit" in vit_name:
feature_extractor = DeiTFeatureExtractor(size=config.image_size)
image_processor = DeiTImageProcessor(size=config.image_size)
else:
feature_extractor = ViTFeatureExtractor(size=config.image_size)
encoding = feature_extractor(images=prepare_img(), return_tensors="pt")
image_processor = ViTImageProcessor(size=config.image_size)
encoding = image_processor(images=prepare_img(), return_tensors="pt")
pixel_values = encoding["pixel_values"]
outputs = model(pixel_values)
......@@ -229,8 +229,8 @@ def convert_vit_checkpoint(vit_name, pytorch_dump_folder_path):
Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
print(f"Saving model {vit_name} to {pytorch_dump_folder_path}")
model.save_pretrained(pytorch_dump_folder_path)
print(f"Saving feature extractor to {pytorch_dump_folder_path}")
feature_extractor.save_pretrained(pytorch_dump_folder_path)
print(f"Saving image processor to {pytorch_dump_folder_path}")
image_processor.save_pretrained(pytorch_dump_folder_path)
if __name__ == "__main__":
......
......@@ -20,7 +20,7 @@ import requests
import torch
from PIL import Image
from transformers import ViTMAEConfig, ViTMAEFeatureExtractor, ViTMAEForPreTraining
from transformers import ViTMAEConfig, ViTMAEForPreTraining, ViTMAEImageProcessor
def rename_key(name):
......@@ -120,7 +120,7 @@ def convert_vit_mae_checkpoint(checkpoint_url, pytorch_dump_folder_path):
state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu")["model"]
feature_extractor = ViTMAEFeatureExtractor(size=config.image_size)
image_processor = ViTMAEImageProcessor(size=config.image_size)
new_state_dict = convert_state_dict(state_dict, config)
......@@ -130,8 +130,8 @@ def convert_vit_mae_checkpoint(checkpoint_url, pytorch_dump_folder_path):
url = "https://user-images.githubusercontent.com/11435359/147738734-196fd92f-9260-48d5-ba7e-bf103d29364d.jpg"
image = Image.open(requests.get(url, stream=True).raw)
feature_extractor = ViTMAEFeatureExtractor(size=config.image_size)
inputs = feature_extractor(images=image, return_tensors="pt")
image_processor = ViTMAEImageProcessor(size=config.image_size)
inputs = image_processor(images=image, return_tensors="pt")
# forward pass
torch.manual_seed(2)
......@@ -157,8 +157,8 @@ def convert_vit_mae_checkpoint(checkpoint_url, pytorch_dump_folder_path):
print(f"Saving model to {pytorch_dump_folder_path}")
model.save_pretrained(pytorch_dump_folder_path)
print(f"Saving feature extractor to {pytorch_dump_folder_path}")
feature_extractor.save_pretrained(pytorch_dump_folder_path)
print(f"Saving image processor to {pytorch_dump_folder_path}")
image_processor.save_pretrained(pytorch_dump_folder_path)
if __name__ == "__main__":
......
......@@ -22,7 +22,7 @@ import torch
from huggingface_hub import hf_hub_download
from PIL import Image
from transformers import ViTFeatureExtractor, ViTMSNConfig, ViTMSNModel
from transformers import ViTImageProcessor, ViTMSNConfig, ViTMSNModel
from transformers.image_utils import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
......@@ -180,7 +180,7 @@ def convert_vit_msn_checkpoint(checkpoint_url, pytorch_dump_folder_path):
state_dict = torch.hub.load_state_dict_from_url(checkpoint_url, map_location="cpu")["target_encoder"]
feature_extractor = ViTFeatureExtractor(size=config.image_size)
image_processor = ViTImageProcessor(size=config.image_size)
remove_projection_head(state_dict)
rename_keys = create_rename_keys(config, base_model=True)
......@@ -195,10 +195,10 @@ def convert_vit_msn_checkpoint(checkpoint_url, pytorch_dump_folder_path):
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
feature_extractor = ViTFeatureExtractor(
image_processor = ViTImageProcessor(
size=config.image_size, image_mean=IMAGENET_DEFAULT_MEAN, image_std=IMAGENET_DEFAULT_STD
)
inputs = feature_extractor(images=image, return_tensors="pt")
inputs = image_processor(images=image, return_tensors="pt")
# forward pass
torch.manual_seed(2)
......@@ -224,8 +224,8 @@ def convert_vit_msn_checkpoint(checkpoint_url, pytorch_dump_folder_path):
print(f"Saving model to {pytorch_dump_folder_path}")
model.save_pretrained(pytorch_dump_folder_path)
print(f"Saving feature extractor to {pytorch_dump_folder_path}")
feature_extractor.save_pretrained(pytorch_dump_folder_path)
print(f"Saving image processor to {pytorch_dump_folder_path}")
image_processor.save_pretrained(pytorch_dump_folder_path)
if __name__ == "__main__":
......
......@@ -23,7 +23,7 @@ from huggingface_hub import hf_hub_download
from transformers import (
CLIPTokenizer,
CLIPTokenizerFast,
VideoMAEFeatureExtractor,
VideoMAEImageProcessor,
XCLIPConfig,
XCLIPModel,
XCLIPProcessor,
......@@ -291,10 +291,10 @@ def convert_xclip_checkpoint(model_name, pytorch_dump_folder_path=None, push_to_
model.eval()
size = 336 if model_name == "xclip-large-patch14-16-frames" else 224
feature_extractor = VideoMAEFeatureExtractor(size=size)
image_processor = VideoMAEImageProcessor(size=size)
slow_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
fast_tokenizer = CLIPTokenizerFast.from_pretrained("openai/clip-vit-base-patch32")
processor = XCLIPProcessor(feature_extractor=feature_extractor, tokenizer=fast_tokenizer)
processor = XCLIPProcessor(image_processor=image_processor, tokenizer=fast_tokenizer)
video = prepare_video(num_frames)
inputs = processor(
......
......@@ -24,7 +24,7 @@ import torch
from huggingface_hub import hf_hub_download
from PIL import Image
from transformers import YolosConfig, YolosFeatureExtractor, YolosForObjectDetection
from transformers import YolosConfig, YolosForObjectDetection, YolosImageProcessor
from transformers.utils import logging
......@@ -172,10 +172,10 @@ def convert_yolos_checkpoint(
new_state_dict = convert_state_dict(state_dict, model)
model.load_state_dict(new_state_dict)
# Check outputs on an image, prepared by YolosFeatureExtractor
# Check outputs on an image, prepared by YolosImageProcessor
size = 800 if yolos_name != "yolos_ti" else 512
feature_extractor = YolosFeatureExtractor(format="coco_detection", size=size)
encoding = feature_extractor(images=prepare_img(), return_tensors="pt")
image_processor = YolosImageProcessor(format="coco_detection", size=size)
encoding = image_processor(images=prepare_img(), return_tensors="pt")
outputs = model(**encoding)
logits, pred_boxes = outputs.logits, outputs.pred_boxes
......@@ -224,8 +224,8 @@ def convert_yolos_checkpoint(
Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
print(f"Saving model {yolos_name} to {pytorch_dump_folder_path}")
model.save_pretrained(pytorch_dump_folder_path)
print(f"Saving feature extractor to {pytorch_dump_folder_path}")
feature_extractor.save_pretrained(pytorch_dump_folder_path)
print(f"Saving image processor to {pytorch_dump_folder_path}")
image_processor.save_pretrained(pytorch_dump_folder_path)
if push_to_hub:
model_mapping = {
......@@ -238,7 +238,7 @@ def convert_yolos_checkpoint(
print("Pushing to the hub...")
model_name = model_mapping[yolos_name]
feature_extractor.push_to_hub(model_name, organization="hustvl")
image_processor.push_to_hub(model_name, organization="hustvl")
model.push_to_hub(model_name, organization="hustvl")
......
......@@ -19,7 +19,7 @@ from pathlib import Path
from packaging import version
from .. import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
from .. import AutoFeatureExtractor, AutoImageProcessor, AutoProcessor, AutoTokenizer
from ..utils import logging
from ..utils.import_utils import is_optimum_available
from .convert import export, validate_model_outputs
......@@ -145,6 +145,8 @@ def export_with_transformers(args):
preprocessor = get_preprocessor(args.model)
elif args.preprocessor == "tokenizer":
preprocessor = AutoTokenizer.from_pretrained(args.model)
elif args.preprocessor == "image_processor":
preprocessor = AutoImageProcessor.from_pretrained(args.model)
elif args.preprocessor == "feature_extractor":
preprocessor = AutoFeatureExtractor.from_pretrained(args.model)
elif args.preprocessor == "processor":
......@@ -213,7 +215,7 @@ def main():
parser.add_argument(
"--preprocessor",
type=str,
choices=["auto", "tokenizer", "feature_extractor", "processor"],
choices=["auto", "tokenizer", "feature_extractor", "image_processor", "processor"],
default="auto",
help="Which type of preprocessor to use. 'auto' tries to automatically detect it.",
)
......
......@@ -49,7 +49,7 @@ if is_vision_available():
import PIL
from PIL import Image
from transformers import BeitFeatureExtractor
from transformers import BeitImageProcessor
class BeitModelTester:
......@@ -342,18 +342,16 @@ def prepare_img():
@require_vision
class BeitModelIntegrationTest(unittest.TestCase):
@cached_property
def default_feature_extractor(self):
return (
BeitFeatureExtractor.from_pretrained("microsoft/beit-base-patch16-224") if is_vision_available() else None
)
def default_image_processor(self):
return BeitImageProcessor.from_pretrained("microsoft/beit-base-patch16-224") if is_vision_available() else None
@slow
def test_inference_masked_image_modeling_head(self):
model = BeitForMaskedImageModeling.from_pretrained("microsoft/beit-base-patch16-224-pt22k").to(torch_device)
feature_extractor = self.default_feature_extractor
image_processor = self.default_image_processor
image = prepare_img()
pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values.to(torch_device)
pixel_values = image_processor(images=image, return_tensors="pt").pixel_values.to(torch_device)
# prepare bool_masked_pos
bool_masked_pos = torch.ones((1, 196), dtype=torch.bool).to(torch_device)
......@@ -377,9 +375,9 @@ class BeitModelIntegrationTest(unittest.TestCase):
def test_inference_image_classification_head_imagenet_1k(self):
model = BeitForImageClassification.from_pretrained("microsoft/beit-base-patch16-224").to(torch_device)
feature_extractor = self.default_feature_extractor
image_processor = self.default_image_processor
image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device)
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass
with torch.no_grad():
......@@ -403,9 +401,9 @@ class BeitModelIntegrationTest(unittest.TestCase):
torch_device
)
feature_extractor = self.default_feature_extractor
image_processor = self.default_image_processor
image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device)
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass
with torch.no_grad():
......@@ -428,11 +426,11 @@ class BeitModelIntegrationTest(unittest.TestCase):
model = BeitForSemanticSegmentation.from_pretrained("microsoft/beit-base-finetuned-ade-640-640")
model = model.to(torch_device)
feature_extractor = BeitFeatureExtractor(do_resize=True, size=640, do_center_crop=False)
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
image = Image.open(ds[0]["file"])
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device)
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass
with torch.no_grad():
......@@ -471,11 +469,11 @@ class BeitModelIntegrationTest(unittest.TestCase):
model = BeitForSemanticSegmentation.from_pretrained("microsoft/beit-base-finetuned-ade-640-640")
model = model.to(torch_device)
feature_extractor = BeitFeatureExtractor(do_resize=True, size=640, do_center_crop=False)
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
image = Image.open(ds[0]["file"])
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device)
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass
with torch.no_grad():
......@@ -483,10 +481,10 @@ class BeitModelIntegrationTest(unittest.TestCase):
outputs.logits = outputs.logits.detach().cpu()
segmentation = feature_extractor.post_process_semantic_segmentation(outputs=outputs, target_sizes=[(500, 300)])
segmentation = image_processor.post_process_semantic_segmentation(outputs=outputs, target_sizes=[(500, 300)])
expected_shape = torch.Size((500, 300))
self.assertEqual(segmentation[0].shape, expected_shape)
segmentation = feature_extractor.post_process_semantic_segmentation(outputs=outputs)
segmentation = image_processor.post_process_semantic_segmentation(outputs=outputs)
expected_shape = torch.Size((160, 160))
self.assertEqual(segmentation[0].shape, expected_shape)
......@@ -33,7 +33,7 @@ if is_flax_available():
if is_vision_available():
from PIL import Image
from transformers import BeitFeatureExtractor
from transformers import BeitImageProcessor
class FlaxBeitModelTester(unittest.TestCase):
......@@ -219,18 +219,16 @@ def prepare_img():
@require_flax
class FlaxBeitModelIntegrationTest(unittest.TestCase):
@cached_property
def default_feature_extractor(self):
return (
BeitFeatureExtractor.from_pretrained("microsoft/beit-base-patch16-224") if is_vision_available() else None
)
def default_image_processor(self):
return BeitImageProcessor.from_pretrained("microsoft/beit-base-patch16-224") if is_vision_available() else None
@slow
def test_inference_masked_image_modeling_head(self):
model = FlaxBeitForMaskedImageModeling.from_pretrained("microsoft/beit-base-patch16-224-pt22k")
feature_extractor = self.default_feature_extractor
image_processor = self.default_image_processor
image = prepare_img()
pixel_values = feature_extractor(images=image, return_tensors="np").pixel_values
pixel_values = image_processor(images=image, return_tensors="np").pixel_values
# prepare bool_masked_pos
bool_masked_pos = np.ones((1, 196), dtype=bool)
......@@ -253,9 +251,9 @@ class FlaxBeitModelIntegrationTest(unittest.TestCase):
def test_inference_image_classification_head_imagenet_1k(self):
model = FlaxBeitForImageClassification.from_pretrained("microsoft/beit-base-patch16-224")
feature_extractor = self.default_feature_extractor
image_processor = self.default_image_processor
image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="np")
inputs = image_processor(images=image, return_tensors="np")
# forward pass
outputs = model(**inputs)
......@@ -276,9 +274,9 @@ class FlaxBeitModelIntegrationTest(unittest.TestCase):
def test_inference_image_classification_head_imagenet_22k(self):
model = FlaxBeitForImageClassification.from_pretrained("microsoft/beit-large-patch16-224-pt22k-ft22k")
feature_extractor = self.default_feature_extractor
image_processor = self.default_image_processor
image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="np")
inputs = image_processor(images=image, return_tensors="np")
# forward pass
outputs = model(**inputs)
......
......@@ -297,7 +297,7 @@ def prepare_img():
@require_vision
class BitModelIntegrationTest(unittest.TestCase):
@cached_property
def default_feature_extractor(self):
def default_image_processor(self):
return (
BitImageProcessor.from_pretrained(BIT_PRETRAINED_MODEL_ARCHIVE_LIST[0]) if is_vision_available() else None
)
......@@ -306,9 +306,9 @@ class BitModelIntegrationTest(unittest.TestCase):
def test_inference_image_classification_head(self):
model = BitForImageClassification.from_pretrained(BIT_PRETRAINED_MODEL_ARCHIVE_LIST[0]).to(torch_device)
feature_extractor = self.default_feature_extractor
image_processor = self.default_image_processor
image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device)
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass
with torch.no_grad():
......
......@@ -145,7 +145,7 @@ class BridgeTowerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Te
pass
def test_call_pil(self):
# Initialize feature_extractor
# Initialize image processor
image_processing = self.image_processing_class(**self.image_processor_dict)
# create random PIL images
image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False)
......@@ -176,7 +176,7 @@ class BridgeTowerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Te
)
def test_call_numpy(self):
# Initialize feature_extractor
# Initialize image processor
image_processing = self.image_processing_class(**self.image_processor_dict)
# create random numpy tensors
image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, numpify=True)
......@@ -207,7 +207,7 @@ class BridgeTowerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Te
)
def test_call_pytorch(self):
# Initialize feature_extractor
# Initialize image processor
image_processing = self.image_processing_class(**self.image_processor_dict)
# create random PyTorch tensors
image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True)
......@@ -238,7 +238,7 @@ class BridgeTowerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Te
)
def test_equivalence_pad_and_create_pixel_mask(self):
# Initialize feature_extractors
# Initialize image processors
image_processing_1 = self.image_processing_class(**self.image_processor_dict)
image_processing_2 = self.image_processing_class(do_resize=False, do_normalize=False, do_rescale=False)
# create random PyTorch tensors
......
......@@ -43,7 +43,7 @@ if is_timm_available():
if is_vision_available():
from PIL import Image
from transformers import ConditionalDetrFeatureExtractor
from transformers import ConditionalDetrImageProcessor
class ConditionalDetrModelTester:
......@@ -493,9 +493,9 @@ def prepare_img():
@slow
class ConditionalDetrModelIntegrationTests(unittest.TestCase):
@cached_property
def default_feature_extractor(self):
def default_image_processor(self):
return (
ConditionalDetrFeatureExtractor.from_pretrained("microsoft/conditional-detr-resnet-50")
ConditionalDetrImageProcessor.from_pretrained("microsoft/conditional-detr-resnet-50")
if is_vision_available()
else None
)
......@@ -503,9 +503,9 @@ class ConditionalDetrModelIntegrationTests(unittest.TestCase):
def test_inference_no_head(self):
model = ConditionalDetrModel.from_pretrained("microsoft/conditional-detr-resnet-50").to(torch_device)
feature_extractor = self.default_feature_extractor
image_processor = self.default_image_processor
image = prepare_img()
encoding = feature_extractor(images=image, return_tensors="pt").to(torch_device)
encoding = image_processor(images=image, return_tensors="pt").to(torch_device)
with torch.no_grad():
outputs = model(**encoding)
......@@ -522,9 +522,9 @@ class ConditionalDetrModelIntegrationTests(unittest.TestCase):
torch_device
)
feature_extractor = self.default_feature_extractor
image_processor = self.default_image_processor
image = prepare_img()
encoding = feature_extractor(images=image, return_tensors="pt").to(torch_device)
encoding = image_processor(images=image, return_tensors="pt").to(torch_device)
pixel_values = encoding["pixel_values"].to(torch_device)
pixel_mask = encoding["pixel_mask"].to(torch_device)
......@@ -547,7 +547,7 @@ class ConditionalDetrModelIntegrationTests(unittest.TestCase):
self.assertTrue(torch.allclose(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, atol=1e-4))
# verify postprocessing
results = feature_extractor.post_process_object_detection(
results = image_processor.post_process_object_detection(
outputs, threshold=0.3, target_sizes=[image.size[::-1]]
)[0]
expected_scores = torch.tensor([0.8330, 0.8313, 0.8039, 0.6829, 0.5355]).to(torch_device)
......
......@@ -38,7 +38,7 @@ if is_torch_available():
if is_vision_available():
from PIL import Image
from transformers import AutoFeatureExtractor
from transformers import AutoImageProcessor
class ConvNextModelTester:
......@@ -285,16 +285,16 @@ def prepare_img():
@require_vision
class ConvNextModelIntegrationTest(unittest.TestCase):
@cached_property
def default_feature_extractor(self):
return AutoFeatureExtractor.from_pretrained("facebook/convnext-tiny-224") if is_vision_available() else None
def default_image_processor(self):
return AutoImageProcessor.from_pretrained("facebook/convnext-tiny-224") if is_vision_available() else None
@slow
def test_inference_image_classification_head(self):
model = ConvNextForImageClassification.from_pretrained("facebook/convnext-tiny-224").to(torch_device)
feature_extractor = self.default_feature_extractor
image_processor = self.default_image_processor
image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device)
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass
with torch.no_grad():
......
......@@ -38,7 +38,7 @@ if is_tf_available():
if is_vision_available():
from PIL import Image
from transformers import ConvNextFeatureExtractor
from transformers import ConvNextImageProcessor
class TFConvNextModelTester:
......@@ -279,18 +279,16 @@ def prepare_img():
@require_vision
class TFConvNextModelIntegrationTest(unittest.TestCase):
@cached_property
def default_feature_extractor(self):
return (
ConvNextFeatureExtractor.from_pretrained("facebook/convnext-tiny-224") if is_vision_available() else None
)
def default_image_processor(self):
return ConvNextImageProcessor.from_pretrained("facebook/convnext-tiny-224") if is_vision_available() else None
@slow
def test_inference_image_classification_head(self):
model = TFConvNextForImageClassification.from_pretrained("facebook/convnext-tiny-224")
feature_extractor = self.default_feature_extractor
image_processor = self.default_image_processor
image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="tf")
inputs = image_processor(images=image, return_tensors="tf")
# forward pass
outputs = model(**inputs)
......
......@@ -38,7 +38,7 @@ if is_torch_available():
if is_vision_available():
from PIL import Image
from transformers import AutoFeatureExtractor
from transformers import AutoImageProcessor
class CvtConfigTester(ConfigTester):
......@@ -264,16 +264,16 @@ def prepare_img():
@require_vision
class CvtModelIntegrationTest(unittest.TestCase):
@cached_property
def default_feature_extractor(self):
return AutoFeatureExtractor.from_pretrained(CVT_PRETRAINED_MODEL_ARCHIVE_LIST[0])
def default_image_processor(self):
return AutoImageProcessor.from_pretrained(CVT_PRETRAINED_MODEL_ARCHIVE_LIST[0])
@slow
def test_inference_image_classification_head(self):
model = CvtForImageClassification.from_pretrained(CVT_PRETRAINED_MODEL_ARCHIVE_LIST[0]).to(torch_device)
feature_extractor = self.default_feature_extractor
image_processor = self.default_image_processor
image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device)
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass
with torch.no_grad():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment