Unverified Commit ae454f41 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Update old existing feature extractor references (#24552)

* Update old existing feature extractor references

* Typo

* Apply suggestions from code review

* Apply suggestions from code review

* Apply suggestions from code review

* Address comments from review - update 'feature extractor'
Co-authored by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
parent 10c2ac7b
......@@ -29,7 +29,7 @@ from detectron2.projects.deeplab import add_deeplab_config
from PIL import Image
from torch import Tensor, nn
from transformers.models.maskformer.feature_extraction_maskformer import MaskFormerFeatureExtractor
from transformers.models.maskformer.feature_extraction_maskformer import MaskFormerImageProcessor
from transformers.models.maskformer.modeling_maskformer import (
MaskFormerConfig,
MaskFormerForInstanceSegmentation,
......@@ -164,13 +164,13 @@ class OriginalMaskFormerConfigToOursConverter:
return config
class OriginalMaskFormerConfigToFeatureExtractorConverter:
def __call__(self, original_config: object) -> MaskFormerFeatureExtractor:
class OriginalMaskFormerConfigToImageProcessorConverter:
def __call__(self, original_config: object) -> MaskFormerImageProcessor:
model = original_config.MODEL
model_input = original_config.INPUT
dataset_catalog = MetadataCatalog.get(original_config.DATASETS.TEST[0])
return MaskFormerFeatureExtractor(
return MaskFormerImageProcessor(
image_mean=(torch.tensor(model.PIXEL_MEAN) / 255).tolist(),
image_std=(torch.tensor(model.PIXEL_STD) / 255).tolist(),
size=model_input.MIN_SIZE_TEST,
......@@ -554,7 +554,7 @@ class OriginalMaskFormerCheckpointToOursConverter:
yield config, checkpoint
def test(original_model, our_model: MaskFormerForInstanceSegmentation, feature_extractor: MaskFormerFeatureExtractor):
def test(original_model, our_model: MaskFormerForInstanceSegmentation, image_processor: MaskFormerImageProcessor):
with torch.no_grad():
original_model = original_model.eval()
our_model = our_model.eval()
......@@ -600,7 +600,7 @@ def test(original_model, our_model: MaskFormerForInstanceSegmentation, feature_e
our_model_out: MaskFormerForInstanceSegmentationOutput = our_model(x)
our_segmentation = feature_extractor.post_process_segmentation(our_model_out, target_size=(384, 384))
our_segmentation = image_processor.post_process_segmentation(our_model_out, target_size=(384, 384))
assert torch.allclose(
original_segmentation, our_segmentation, atol=1e-3
......@@ -686,9 +686,7 @@ if __name__ == "__main__":
for config_file, checkpoint_file in OriginalMaskFormerCheckpointToOursConverter.using_dirs(
checkpoints_dir, config_dir
):
feature_extractor = OriginalMaskFormerConfigToFeatureExtractorConverter()(
setup_cfg(Args(config_file=config_file))
)
image_processor = OriginalMaskFormerConfigToImageProcessorConverter()(setup_cfg(Args(config_file=config_file)))
original_config = setup_cfg(Args(config_file=config_file))
mask_former_kwargs = OriginalMaskFormer.from_config(original_config)
......@@ -712,15 +710,15 @@ if __name__ == "__main__":
mask_former_for_instance_segmentation
)
test(original_model, mask_former_for_instance_segmentation, feature_extractor)
test(original_model, mask_former_for_instance_segmentation, image_processor)
model_name = get_name(checkpoint_file)
logger.info(f"🪄 Saving {model_name}")
feature_extractor.save_pretrained(save_directory / model_name)
image_processor.save_pretrained(save_directory / model_name)
mask_former_for_instance_segmentation.save_pretrained(save_directory / model_name)
feature_extractor.push_to_hub(
image_processor.push_to_hub(
repo_path_or_name=save_directory / model_name,
commit_message="Add model",
use_temp_dir=True,
......
......@@ -26,7 +26,7 @@ import torch
from huggingface_hub import hf_hub_download
from PIL import Image
from transformers import MaskFormerConfig, MaskFormerFeatureExtractor, MaskFormerForInstanceSegmentation, ResNetConfig
from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation, MaskFormerImageProcessor, ResNetConfig
from transformers.utils import logging
......@@ -297,9 +297,9 @@ def convert_maskformer_checkpoint(
else:
ignore_index = 255
reduce_labels = True if "ade" in model_name else False
feature_extractor = MaskFormerFeatureExtractor(ignore_index=ignore_index, reduce_labels=reduce_labels)
image_processor = MaskFormerImageProcessor(ignore_index=ignore_index, reduce_labels=reduce_labels)
inputs = feature_extractor(image, return_tensors="pt")
inputs = image_processor(image, return_tensors="pt")
outputs = model(**inputs)
......@@ -340,15 +340,15 @@ def convert_maskformer_checkpoint(
print("Looks ok!")
if pytorch_dump_folder_path is not None:
print(f"Saving model and feature extractor of {model_name} to {pytorch_dump_folder_path}")
print(f"Saving model and image processor of {model_name} to {pytorch_dump_folder_path}")
Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
model.save_pretrained(pytorch_dump_folder_path)
feature_extractor.save_pretrained(pytorch_dump_folder_path)
image_processor.save_pretrained(pytorch_dump_folder_path)
if push_to_hub:
print(f"Pushing model and feature extractor of {model_name} to the hub...")
print(f"Pushing model and image processor of {model_name} to the hub...")
model.push_to_hub(f"facebook/{model_name}")
feature_extractor.push_to_hub(f"facebook/{model_name}")
image_processor.push_to_hub(f"facebook/{model_name}")
if __name__ == "__main__":
......
......@@ -26,7 +26,7 @@ import torch
from huggingface_hub import hf_hub_download
from PIL import Image
from transformers import MaskFormerConfig, MaskFormerFeatureExtractor, MaskFormerForInstanceSegmentation, SwinConfig
from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation, MaskFormerImageProcessor, SwinConfig
from transformers.utils import logging
......@@ -278,9 +278,9 @@ def convert_maskformer_checkpoint(
else:
ignore_index = 255
reduce_labels = True if "ade" in model_name else False
feature_extractor = MaskFormerFeatureExtractor(ignore_index=ignore_index, reduce_labels=reduce_labels)
image_processor = MaskFormerImageProcessor(ignore_index=ignore_index, reduce_labels=reduce_labels)
inputs = feature_extractor(image, return_tensors="pt")
inputs = image_processor(image, return_tensors="pt")
outputs = model(**inputs)
......@@ -294,15 +294,15 @@ def convert_maskformer_checkpoint(
print("Looks ok!")
if pytorch_dump_folder_path is not None:
print(f"Saving model and feature extractor to {pytorch_dump_folder_path}")
print(f"Saving model and image processor to {pytorch_dump_folder_path}")
Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
model.save_pretrained(pytorch_dump_folder_path)
feature_extractor.save_pretrained(pytorch_dump_folder_path)
image_processor.save_pretrained(pytorch_dump_folder_path)
if push_to_hub:
print("Pushing model and feature extractor to the hub...")
print("Pushing model and image processor to the hub...")
model.push_to_hub(f"nielsr/{model_name}")
feature_extractor.push_to_hub(f"nielsr/{model_name}")
image_processor.push_to_hub(f"nielsr/{model_name}")
if __name__ == "__main__":
......
......@@ -27,8 +27,8 @@ from PIL import Image
from transformers import (
MobileNetV1Config,
MobileNetV1FeatureExtractor,
MobileNetV1ForImageClassification,
MobileNetV1ImageProcessor,
load_tf_weights_in_mobilenet_v1,
)
from transformers.utils import logging
......@@ -83,12 +83,12 @@ def convert_movilevit_checkpoint(model_name, checkpoint_path, pytorch_dump_folde
# Load weights from TensorFlow checkpoint
load_tf_weights_in_mobilenet_v1(model, config, checkpoint_path)
# Check outputs on an image, prepared by MobileNetV1FeatureExtractor
feature_extractor = MobileNetV1FeatureExtractor(
# Check outputs on an image, prepared by MobileNetV1ImageProcessor
image_processor = MobileNetV1ImageProcessor(
crop_size={"width": config.image_size, "height": config.image_size},
size={"shortest_edge": config.image_size + 32},
)
encoding = feature_extractor(images=prepare_img(), return_tensors="pt")
encoding = image_processor(images=prepare_img(), return_tensors="pt")
outputs = model(**encoding)
logits = outputs.logits
......@@ -107,13 +107,13 @@ def convert_movilevit_checkpoint(model_name, checkpoint_path, pytorch_dump_folde
Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
print(f"Saving model {model_name} to {pytorch_dump_folder_path}")
model.save_pretrained(pytorch_dump_folder_path)
print(f"Saving feature extractor to {pytorch_dump_folder_path}")
feature_extractor.save_pretrained(pytorch_dump_folder_path)
print(f"Saving image processor to {pytorch_dump_folder_path}")
image_processor.save_pretrained(pytorch_dump_folder_path)
if push_to_hub:
print("Pushing to the hub...")
repo_id = "google/" + model_name
feature_extractor.push_to_hub(repo_id)
image_processor.push_to_hub(repo_id)
model.push_to_hub(repo_id)
......
......@@ -99,11 +99,11 @@ def convert_movilevit_checkpoint(model_name, checkpoint_path, pytorch_dump_folde
load_tf_weights_in_mobilenet_v2(model, config, checkpoint_path)
# Check outputs on an image, prepared by MobileNetV2ImageProcessor
feature_extractor = MobileNetV2ImageProcessor(
image_processor = MobileNetV2ImageProcessor(
crop_size={"width": config.image_size, "height": config.image_size},
size={"shortest_edge": config.image_size + 32},
)
encoding = feature_extractor(images=prepare_img(), return_tensors="pt")
encoding = image_processor(images=prepare_img(), return_tensors="pt")
outputs = model(**encoding)
logits = outputs.logits
......@@ -143,13 +143,13 @@ def convert_movilevit_checkpoint(model_name, checkpoint_path, pytorch_dump_folde
Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
print(f"Saving model {model_name} to {pytorch_dump_folder_path}")
model.save_pretrained(pytorch_dump_folder_path)
print(f"Saving feature extractor to {pytorch_dump_folder_path}")
feature_extractor.save_pretrained(pytorch_dump_folder_path)
print(f"Saving image processor to {pytorch_dump_folder_path}")
image_processor.save_pretrained(pytorch_dump_folder_path)
if push_to_hub:
print("Pushing to the hub...")
repo_id = "google/" + model_name
feature_extractor.push_to_hub(repo_id)
image_processor.push_to_hub(repo_id)
model.push_to_hub(repo_id)
......
......@@ -26,9 +26,9 @@ from PIL import Image
from transformers import (
MobileViTConfig,
MobileViTFeatureExtractor,
MobileViTForImageClassification,
MobileViTForSemanticSegmentation,
MobileViTImageProcessor,
)
from transformers.utils import logging
......@@ -211,9 +211,9 @@ def convert_movilevit_checkpoint(mobilevit_name, checkpoint_path, pytorch_dump_f
new_state_dict = convert_state_dict(state_dict, model)
model.load_state_dict(new_state_dict)
# Check outputs on an image, prepared by MobileViTFeatureExtractor
feature_extractor = MobileViTFeatureExtractor(crop_size=config.image_size, size=config.image_size + 32)
encoding = feature_extractor(images=prepare_img(), return_tensors="pt")
# Check outputs on an image, prepared by MobileViTImageProcessor
image_processor = MobileViTImageProcessor(crop_size=config.image_size, size=config.image_size + 32)
encoding = image_processor(images=prepare_img(), return_tensors="pt")
outputs = model(**encoding)
logits = outputs.logits
......@@ -265,8 +265,8 @@ def convert_movilevit_checkpoint(mobilevit_name, checkpoint_path, pytorch_dump_f
Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
print(f"Saving model {mobilevit_name} to {pytorch_dump_folder_path}")
model.save_pretrained(pytorch_dump_folder_path)
print(f"Saving feature extractor to {pytorch_dump_folder_path}")
feature_extractor.save_pretrained(pytorch_dump_folder_path)
print(f"Saving image processor to {pytorch_dump_folder_path}")
image_processor.save_pretrained(pytorch_dump_folder_path)
if push_to_hub:
model_mapping = {
......@@ -280,7 +280,7 @@ def convert_movilevit_checkpoint(mobilevit_name, checkpoint_path, pytorch_dump_f
print("Pushing to the hub...")
model_name = model_mapping[mobilevit_name]
feature_extractor.push_to_hub(model_name, organization="apple")
image_processor.push_to_hub(model_name, organization="apple")
model.push_to_hub(model_name, organization="apple")
......
......@@ -259,8 +259,8 @@ def convert_mobilevitv2_checkpoint(task_name, checkpoint_path, orig_config_path,
model.load_state_dict(state_dict)
# Check outputs on an image, prepared by MobileViTImageProcessor
feature_extractor = MobileViTImageProcessor(crop_size=config.image_size, size=config.image_size + 32)
encoding = feature_extractor(images=prepare_img(), return_tensors="pt")
image_processor = MobileViTImageProcessor(crop_size=config.image_size, size=config.image_size + 32)
encoding = image_processor(images=prepare_img(), return_tensors="pt")
outputs = model(**encoding)
# verify classification model
......@@ -276,8 +276,8 @@ def convert_mobilevitv2_checkpoint(task_name, checkpoint_path, orig_config_path,
Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
print(f"Saving model {task_name} to {pytorch_dump_folder_path}")
model.save_pretrained(pytorch_dump_folder_path)
print(f"Saving feature extractor to {pytorch_dump_folder_path}")
feature_extractor.save_pretrained(pytorch_dump_folder_path)
print(f"Saving image processor to {pytorch_dump_folder_path}")
image_processor.save_pretrained(pytorch_dump_folder_path)
if __name__ == "__main__":
......
......@@ -383,7 +383,7 @@ class OwlViTOnnxConfig(OnnxConfig):
processor.tokenizer, batch_size=batch_size, seq_length=seq_length, framework=framework
)
image_input_dict = super().generate_dummy_inputs(
processor.feature_extractor, batch_size=batch_size, framework=framework
processor.image_processor, batch_size=batch_size, framework=framework
)
return {**text_input_dict, **image_input_dict}
......
......@@ -29,8 +29,8 @@ from huggingface_hub import Repository
from transformers import (
CLIPTokenizer,
OwlViTConfig,
OwlViTFeatureExtractor,
OwlViTForObjectDetection,
OwlViTImageProcessor,
OwlViTModel,
OwlViTProcessor,
)
......@@ -350,16 +350,16 @@ def convert_owlvit_checkpoint(pt_backbone, flax_params, attn_params, pytorch_dum
# Save HF model
hf_model.save_pretrained(repo.local_dir)
# Initialize feature extractor
feature_extractor = OwlViTFeatureExtractor(
# Initialize image processor
image_processor = OwlViTImageProcessor(
size=config.vision_config.image_size, crop_size=config.vision_config.image_size
)
# Initialize tokenizer
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32", pad_token="!", model_max_length=16)
# Initialize processor
processor = OwlViTProcessor(feature_extractor=feature_extractor, tokenizer=tokenizer)
feature_extractor.save_pretrained(repo.local_dir)
processor = OwlViTProcessor(image_processor=image_processor, tokenizer=tokenizer)
image_processor.save_pretrained(repo.local_dir)
processor.save_pretrained(repo.local_dir)
repo.git_add()
......
......@@ -29,13 +29,13 @@ from PIL import Image
from transformers import (
PerceiverConfig,
PerceiverFeatureExtractor,
PerceiverForImageClassificationConvProcessing,
PerceiverForImageClassificationFourier,
PerceiverForImageClassificationLearned,
PerceiverForMaskedLM,
PerceiverForMultimodalAutoencoding,
PerceiverForOpticalFlow,
PerceiverImageProcessor,
PerceiverTokenizer,
)
from transformers.utils import logging
......@@ -389,9 +389,9 @@ def convert_perceiver_checkpoint(pickle_file, pytorch_dump_folder_path, architec
inputs = encoding.input_ids
input_mask = encoding.attention_mask
elif architecture in ["image_classification", "image_classification_fourier", "image_classification_conv"]:
feature_extractor = PerceiverFeatureExtractor()
image_processor = PerceiverImageProcessor()
image = prepare_img()
encoding = feature_extractor(image, return_tensors="pt")
encoding = image_processor(image, return_tensors="pt")
inputs = encoding.pixel_values
elif architecture == "optical_flow":
inputs = torch.randn(1, 2, 27, 368, 496)
......
......@@ -24,7 +24,7 @@ import torch
from huggingface_hub import hf_hub_download
from PIL import Image
from transformers import PoolFormerConfig, PoolFormerFeatureExtractor, PoolFormerForImageClassification
from transformers import PoolFormerConfig, PoolFormerForImageClassification, PoolFormerImageProcessor
from transformers.utils import logging
......@@ -141,12 +141,12 @@ def convert_poolformer_checkpoint(model_name, checkpoint_path, pytorch_dump_fold
else:
raise ValueError(f"Size {size} not supported")
# load feature extractor
feature_extractor = PoolFormerFeatureExtractor(crop_pct=crop_pct)
# load image processor
image_processor = PoolFormerImageProcessor(crop_pct=crop_pct)
# Prepare image
image = prepare_img()
pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
pixel_values = image_processor(images=image, return_tensors="pt").pixel_values
logger.info(f"Converting model {model_name}...")
......@@ -161,9 +161,9 @@ def convert_poolformer_checkpoint(model_name, checkpoint_path, pytorch_dump_fold
model.load_state_dict(state_dict)
model.eval()
# Define feature extractor
feature_extractor = PoolFormerFeatureExtractor(crop_pct=crop_pct)
pixel_values = feature_extractor(images=prepare_img(), return_tensors="pt").pixel_values
# Define image processor
image_processor = PoolFormerImageProcessor(crop_pct=crop_pct)
pixel_values = image_processor(images=prepare_img(), return_tensors="pt").pixel_values
# forward pass
outputs = model(pixel_values)
......@@ -187,12 +187,12 @@ def convert_poolformer_checkpoint(model_name, checkpoint_path, pytorch_dump_fold
assert logits.shape == expected_shape
assert torch.allclose(logits[0, :3], expected_slice, atol=1e-2)
# finally, save model and feature extractor
logger.info(f"Saving PyTorch model and feature extractor to {pytorch_dump_folder_path}...")
# finally, save model and image processor
logger.info(f"Saving PyTorch model and image processor to {pytorch_dump_folder_path}...")
Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
model.save_pretrained(pytorch_dump_folder_path)
print(f"Saving feature extractor to {pytorch_dump_folder_path}")
feature_extractor.save_pretrained(pytorch_dump_folder_path)
print(f"Saving image processor to {pytorch_dump_folder_path}")
image_processor.save_pretrained(pytorch_dump_folder_path)
if __name__ == "__main__":
......
......@@ -34,7 +34,7 @@ from huggingface_hub import cached_download, hf_hub_url
from torch import Tensor
from vissl.models.model_helpers import get_trunk_forward_outputs
from transformers import AutoFeatureExtractor, RegNetConfig, RegNetForImageClassification, RegNetModel
from transformers import AutoImageProcessor, RegNetConfig, RegNetForImageClassification, RegNetModel
from transformers.modeling_utils import PreTrainedModel
from transformers.utils import logging
......@@ -262,10 +262,10 @@ def convert_weights_and_push(save_directory: Path, model_name: str = None, push_
)
size = 384
# we can use the convnext one
feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/convnext-base-224-22k-1k", size=size)
feature_extractor.push_to_hub(
image_processor = AutoImageProcessor.from_pretrained("facebook/convnext-base-224-22k-1k", size=size)
image_processor.push_to_hub(
repo_path_or_name=save_directory / model_name,
commit_message="Add feature extractor",
commit_message="Add image processor",
output_dir=save_directory / model_name,
)
......@@ -294,7 +294,7 @@ if __name__ == "__main__":
default=True,
type=bool,
required=False,
help="If True, push model and feature extractor to the hub.",
help="If True, push model and image processor to the hub.",
)
args = parser.parse_args()
......
......@@ -30,7 +30,7 @@ from huggingface_hub import cached_download, hf_hub_url
from torch import Tensor
from vissl.models.model_helpers import get_trunk_forward_outputs
from transformers import AutoFeatureExtractor, RegNetConfig, RegNetForImageClassification, RegNetModel
from transformers import AutoImageProcessor, RegNetConfig, RegNetForImageClassification, RegNetModel
from transformers.utils import logging
......@@ -209,10 +209,10 @@ def convert_weight_and_push(
size = 224 if "seer" not in name else 384
# we can use the convnext one
feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/convnext-base-224-22k-1k", size=size)
feature_extractor.push_to_hub(
image_processor = AutoImageProcessor.from_pretrained("facebook/convnext-base-224-22k-1k", size=size)
image_processor.push_to_hub(
repo_path_or_name=save_directory / name,
commit_message="Add feature extractor",
commit_message="Add image processor",
use_temp_dir=True,
)
......@@ -449,7 +449,7 @@ if __name__ == "__main__":
default=True,
type=bool,
required=False,
help="If True, push model and feature extractor to the hub.",
help="If True, push model and image processor to the hub.",
)
args = parser.parse_args()
......
......@@ -28,7 +28,7 @@ import torch.nn as nn
from huggingface_hub import hf_hub_download
from torch import Tensor
from transformers import AutoFeatureExtractor, ResNetConfig, ResNetForImageClassification
from transformers import AutoImageProcessor, ResNetConfig, ResNetForImageClassification
from transformers.utils import logging
......@@ -113,10 +113,10 @@ def convert_weight_and_push(name: str, config: ResNetConfig, save_directory: Pat
)
# we can use the convnext one
feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/convnext-base-224-22k-1k")
feature_extractor.push_to_hub(
image_processor = AutoImageProcessor.from_pretrained("facebook/convnext-base-224-22k-1k")
image_processor.push_to_hub(
repo_path_or_name=save_directory / checkpoint_name,
commit_message="Add feature extractor",
commit_message="Add image processor",
use_temp_dir=True,
)
......@@ -191,7 +191,7 @@ if __name__ == "__main__":
default=True,
type=bool,
required=False,
help="If True, push model and feature extractor to the hub.",
help="If True, push model and image processor to the hub.",
)
args = parser.parse_args()
......
......@@ -27,9 +27,9 @@ from PIL import Image
from transformers import (
SegformerConfig,
SegformerFeatureExtractor,
SegformerForImageClassification,
SegformerForSemanticSegmentation,
SegformerImageProcessor,
)
from transformers.utils import logging
......@@ -179,14 +179,14 @@ def convert_segformer_checkpoint(model_name, checkpoint_path, pytorch_dump_folde
else:
raise ValueError(f"Size {size} not supported")
# load feature extractor (only resize + normalize)
feature_extractor = SegformerFeatureExtractor(
# load image processor (only resize + normalize)
image_processor = SegformerImageProcessor(
image_scale=(512, 512), keep_ratio=False, align=False, do_random_crop=False
)
# prepare image
image = prepare_img()
pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
pixel_values = image_processor(images=image, return_tensors="pt").pixel_values
logger.info(f"Converting model {model_name}...")
......@@ -362,11 +362,11 @@ def convert_segformer_checkpoint(model_name, checkpoint_path, pytorch_dump_folde
assert logits.shape == expected_shape
assert torch.allclose(logits[0, :3, :3, :3], expected_slice, atol=1e-2)
# finally, save model and feature extractor
logger.info(f"Saving PyTorch model and feature extractor to {pytorch_dump_folder_path}...")
# finally, save model and image processor
logger.info(f"Saving PyTorch model and image processor to {pytorch_dump_folder_path}...")
Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
model.save_pretrained(pytorch_dump_folder_path)
feature_extractor.save_pretrained(pytorch_dump_folder_path)
image_processor.save_pretrained(pytorch_dump_folder_path)
if __name__ == "__main__":
......
......@@ -22,7 +22,7 @@ import requests
import torch
from PIL import Image
from transformers import SwinConfig, SwinForMaskedImageModeling, ViTFeatureExtractor
from transformers import SwinConfig, SwinForMaskedImageModeling, ViTImageProcessor
def get_swin_config(model_name):
......@@ -132,9 +132,9 @@ def convert_swin_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_pat
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
feature_extractor = ViTFeatureExtractor(size={"height": 192, "width": 192})
image_processor = ViTImageProcessor(size={"height": 192, "width": 192})
image = Image.open(requests.get(url, stream=True).raw)
inputs = feature_extractor(images=image, return_tensors="pt")
inputs = image_processor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs).logits
......@@ -146,13 +146,13 @@ def convert_swin_checkpoint(model_name, checkpoint_path, pytorch_dump_folder_pat
print(f"Saving model {model_name} to {pytorch_dump_folder_path}")
model.save_pretrained(pytorch_dump_folder_path)
print(f"Saving feature extractor to {pytorch_dump_folder_path}")
feature_extractor.save_pretrained(pytorch_dump_folder_path)
print(f"Saving image processor to {pytorch_dump_folder_path}")
image_processor.save_pretrained(pytorch_dump_folder_path)
if push_to_hub:
print(f"Pushing model and feature extractor for {model_name} to hub")
print(f"Pushing model and image processor for {model_name} to hub")
model.push_to_hub(f"microsoft/{model_name}")
feature_extractor.push_to_hub(f"microsoft/{model_name}")
image_processor.push_to_hub(f"microsoft/{model_name}")
if __name__ == "__main__":
......
......@@ -7,7 +7,7 @@ import torch
from huggingface_hub import hf_hub_download
from PIL import Image
from transformers import AutoFeatureExtractor, SwinConfig, SwinForImageClassification
from transformers import AutoImageProcessor, SwinConfig, SwinForImageClassification
def get_swin_config(swin_name):
......@@ -140,9 +140,9 @@ def convert_swin_checkpoint(swin_name, pytorch_dump_folder_path):
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
feature_extractor = AutoFeatureExtractor.from_pretrained("microsoft/{}".format(swin_name.replace("_", "-")))
image_processor = AutoImageProcessor.from_pretrained("microsoft/{}".format(swin_name.replace("_", "-")))
image = Image.open(requests.get(url, stream=True).raw)
inputs = feature_extractor(images=image, return_tensors="pt")
inputs = image_processor(images=image, return_tensors="pt")
timm_outs = timm_model(inputs["pixel_values"])
hf_outs = model(**inputs).logits
......@@ -152,8 +152,8 @@ def convert_swin_checkpoint(swin_name, pytorch_dump_folder_path):
print(f"Saving model {swin_name} to {pytorch_dump_folder_path}")
model.save_pretrained(pytorch_dump_folder_path)
print(f"Saving feature extractor to {pytorch_dump_folder_path}")
feature_extractor.save_pretrained(pytorch_dump_folder_path)
print(f"Saving image processor to {pytorch_dump_folder_path}")
image_processor.save_pretrained(pytorch_dump_folder_path)
if __name__ == "__main__":
......
......@@ -24,7 +24,7 @@ import torch
from huggingface_hub import hf_hub_download
from PIL import Image
from transformers import AutoFeatureExtractor, Swinv2Config, Swinv2ForImageClassification
from transformers import AutoImageProcessor, Swinv2Config, Swinv2ForImageClassification
def get_swinv2_config(swinv2_name):
......@@ -180,9 +180,9 @@ def convert_swinv2_checkpoint(swinv2_name, pytorch_dump_folder_path):
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
feature_extractor = AutoFeatureExtractor.from_pretrained("microsoft/{}".format(swinv2_name.replace("_", "-")))
image_processor = AutoImageProcessor.from_pretrained("microsoft/{}".format(swinv2_name.replace("_", "-")))
image = Image.open(requests.get(url, stream=True).raw)
inputs = feature_extractor(images=image, return_tensors="pt")
inputs = image_processor(images=image, return_tensors="pt")
timm_outs = timm_model(inputs["pixel_values"])
hf_outs = model(**inputs).logits
......@@ -192,8 +192,8 @@ def convert_swinv2_checkpoint(swinv2_name, pytorch_dump_folder_path):
print(f"Saving model {swinv2_name} to {pytorch_dump_folder_path}")
model.save_pretrained(pytorch_dump_folder_path)
print(f"Saving feature extractor to {pytorch_dump_folder_path}")
feature_extractor.save_pretrained(pytorch_dump_folder_path)
print(f"Saving image processor to {pytorch_dump_folder_path}")
image_processor.save_pretrained(pytorch_dump_folder_path)
model.push_to_hub(
repo_path_or_name=Path(pytorch_dump_folder_path, swinv2_name),
......
......@@ -27,7 +27,7 @@ from huggingface_hub import hf_hub_download
from PIL import Image
from torchvision.transforms import functional as F
from transformers import DetrFeatureExtractor, TableTransformerConfig, TableTransformerForObjectDetection
from transformers import DetrImageProcessor, TableTransformerConfig, TableTransformerForObjectDetection
from transformers.utils import logging
......@@ -242,7 +242,7 @@ def convert_table_transformer_checkpoint(checkpoint_url, pytorch_dump_folder_pat
config.id2label = id2label
config.label2id = {v: k for k, v in id2label.items()}
feature_extractor = DetrFeatureExtractor(
image_processor = DetrImageProcessor(
format="coco_detection", max_size=800 if "detection" in checkpoint_url else 1000
)
model = TableTransformerForObjectDetection(config)
......@@ -277,11 +277,11 @@ def convert_table_transformer_checkpoint(checkpoint_url, pytorch_dump_folder_pat
print("Looks ok!")
if pytorch_dump_folder_path is not None:
# Save model and feature extractor
logger.info(f"Saving PyTorch model and feature extractor to {pytorch_dump_folder_path}...")
# Save model and image processor
logger.info(f"Saving PyTorch model and image processor to {pytorch_dump_folder_path}...")
Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
model.save_pretrained(pytorch_dump_folder_path)
feature_extractor.save_pretrained(pytorch_dump_folder_path)
image_processor.save_pretrained(pytorch_dump_folder_path)
if push_to_hub:
# Push model to HF hub
......@@ -292,7 +292,7 @@ def convert_table_transformer_checkpoint(checkpoint_url, pytorch_dump_folder_pat
else "microsoft/table-transformer-structure-recognition"
)
model.push_to_hub(model_name)
feature_extractor.push_to_hub(model_name)
image_processor.push_to_hub(model_name)
if __name__ == "__main__":
......
......@@ -22,7 +22,7 @@ import numpy as np
import torch
from huggingface_hub import hf_hub_download
from transformers import TimesformerConfig, TimesformerForVideoClassification, VideoMAEFeatureExtractor
from transformers import TimesformerConfig, TimesformerForVideoClassification, VideoMAEImageProcessor
def get_timesformer_config(model_name):
......@@ -156,9 +156,9 @@ def convert_timesformer_checkpoint(checkpoint_url, pytorch_dump_folder_path, mod
model.eval()
# verify model on basic input
feature_extractor = VideoMAEFeatureExtractor(image_mean=[0.5, 0.5, 0.5], image_std=[0.5, 0.5, 0.5])
image_processor = VideoMAEImageProcessor(image_mean=[0.5, 0.5, 0.5], image_std=[0.5, 0.5, 0.5])
video = prepare_video()
inputs = feature_extractor(video[:8], return_tensors="pt")
inputs = image_processor(video[:8], return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits
......@@ -215,8 +215,8 @@ def convert_timesformer_checkpoint(checkpoint_url, pytorch_dump_folder_path, mod
print("Logits ok!")
if pytorch_dump_folder_path is not None:
print(f"Saving model and feature extractor to {pytorch_dump_folder_path}")
feature_extractor.save_pretrained(pytorch_dump_folder_path)
print(f"Saving model and image processor to {pytorch_dump_folder_path}")
image_processor.save_pretrained(pytorch_dump_folder_path)
model.save_pretrained(pytorch_dump_folder_path)
if push_to_hub:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment