Unverified Commit ae454f41 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Update old existing feature extractor references (#24552)

* Update old existing feature extractor references

* Typo

* Apply suggestions from code review

* Apply suggestions from code review

* Apply suggestions from code review

* Address comments from review - update 'feature extractor'
Co-authored by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
parent 10c2ac7b
...@@ -28,7 +28,7 @@ if is_tf_available(): ...@@ -28,7 +28,7 @@ if is_tf_available():
if is_vision_available(): if is_vision_available():
from PIL import Image from PIL import Image
from transformers import AutoFeatureExtractor from transformers import AutoImageProcessor
class TFCvtConfigTester(ConfigTester): class TFCvtConfigTester(ConfigTester):
...@@ -265,16 +265,16 @@ def prepare_img(): ...@@ -265,16 +265,16 @@ def prepare_img():
@require_vision @require_vision
class TFCvtModelIntegrationTest(unittest.TestCase): class TFCvtModelIntegrationTest(unittest.TestCase):
@cached_property @cached_property
def default_feature_extractor(self): def default_image_processor(self):
return AutoFeatureExtractor.from_pretrained(TF_CVT_PRETRAINED_MODEL_ARCHIVE_LIST[0]) return AutoImageProcessor.from_pretrained(TF_CVT_PRETRAINED_MODEL_ARCHIVE_LIST[0])
@slow @slow
def test_inference_image_classification_head(self): def test_inference_image_classification_head(self):
model = TFCvtForImageClassification.from_pretrained(TF_CVT_PRETRAINED_MODEL_ARCHIVE_LIST[0]) model = TFCvtForImageClassification.from_pretrained(TF_CVT_PRETRAINED_MODEL_ARCHIVE_LIST[0])
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="tf") inputs = image_processor(images=image, return_tensors="tf")
# forward pass # forward pass
outputs = model(**inputs) outputs = model(**inputs)
......
...@@ -44,7 +44,7 @@ if is_torch_available(): ...@@ -44,7 +44,7 @@ if is_torch_available():
if is_vision_available(): if is_vision_available():
from PIL import Image from PIL import Image
from transformers import BeitFeatureExtractor from transformers import BeitImageProcessor
class Data2VecVisionModelTester: class Data2VecVisionModelTester:
...@@ -327,11 +327,9 @@ def prepare_img(): ...@@ -327,11 +327,9 @@ def prepare_img():
@require_vision @require_vision
class Data2VecVisionModelIntegrationTest(unittest.TestCase): class Data2VecVisionModelIntegrationTest(unittest.TestCase):
@cached_property @cached_property
def default_feature_extractor(self): def default_image_processor(self):
return ( return (
BeitFeatureExtractor.from_pretrained("facebook/data2vec-vision-base-ft1k") BeitImageProcessor.from_pretrained("facebook/data2vec-vision-base-ft1k") if is_vision_available() else None
if is_vision_available()
else None
) )
@slow @slow
...@@ -340,9 +338,9 @@ class Data2VecVisionModelIntegrationTest(unittest.TestCase): ...@@ -340,9 +338,9 @@ class Data2VecVisionModelIntegrationTest(unittest.TestCase):
torch_device torch_device
) )
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device) inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass # forward pass
with torch.no_grad(): with torch.no_grad():
......
...@@ -46,7 +46,7 @@ if is_tf_available(): ...@@ -46,7 +46,7 @@ if is_tf_available():
if is_vision_available(): if is_vision_available():
from PIL import Image from PIL import Image
from transformers import BeitFeatureExtractor from transformers import BeitImageProcessor
class TFData2VecVisionModelTester: class TFData2VecVisionModelTester:
...@@ -469,20 +469,18 @@ def prepare_img(): ...@@ -469,20 +469,18 @@ def prepare_img():
@require_vision @require_vision
class TFData2VecVisionModelIntegrationTest(unittest.TestCase): class TFData2VecVisionModelIntegrationTest(unittest.TestCase):
@cached_property @cached_property
def default_feature_extractor(self): def default_image_processor(self):
return ( return (
BeitFeatureExtractor.from_pretrained("facebook/data2vec-vision-base-ft1k") BeitImageProcessor.from_pretrained("facebook/data2vec-vision-base-ft1k") if is_vision_available() else None
if is_vision_available()
else None
) )
@slow @slow
def test_inference_image_classification_head_imagenet_1k(self): def test_inference_image_classification_head_imagenet_1k(self):
model = TFData2VecVisionForImageClassification.from_pretrained("facebook/data2vec-vision-base-ft1k") model = TFData2VecVisionForImageClassification.from_pretrained("facebook/data2vec-vision-base-ft1k")
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="tf") inputs = image_processor(images=image, return_tensors="tf")
# forward pass # forward pass
outputs = model(**inputs) outputs = model(**inputs)
......
...@@ -39,7 +39,7 @@ if is_timm_available(): ...@@ -39,7 +39,7 @@ if is_timm_available():
if is_vision_available(): if is_vision_available():
from PIL import Image from PIL import Image
from transformers import AutoFeatureExtractor from transformers import AutoImageProcessor
class DeformableDetrModelTester: class DeformableDetrModelTester:
...@@ -563,15 +563,15 @@ def prepare_img(): ...@@ -563,15 +563,15 @@ def prepare_img():
@slow @slow
class DeformableDetrModelIntegrationTests(unittest.TestCase): class DeformableDetrModelIntegrationTests(unittest.TestCase):
@cached_property @cached_property
def default_feature_extractor(self): def default_image_processor(self):
return AutoFeatureExtractor.from_pretrained("SenseTime/deformable-detr") if is_vision_available() else None return AutoImageProcessor.from_pretrained("SenseTime/deformable-detr") if is_vision_available() else None
def test_inference_object_detection_head(self): def test_inference_object_detection_head(self):
model = DeformableDetrForObjectDetection.from_pretrained("SenseTime/deformable-detr").to(torch_device) model = DeformableDetrForObjectDetection.from_pretrained("SenseTime/deformable-detr").to(torch_device)
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
encoding = feature_extractor(images=image, return_tensors="pt").to(torch_device) encoding = image_processor(images=image, return_tensors="pt").to(torch_device)
pixel_values = encoding["pixel_values"].to(torch_device) pixel_values = encoding["pixel_values"].to(torch_device)
pixel_mask = encoding["pixel_mask"].to(torch_device) pixel_mask = encoding["pixel_mask"].to(torch_device)
...@@ -595,7 +595,7 @@ class DeformableDetrModelIntegrationTests(unittest.TestCase): ...@@ -595,7 +595,7 @@ class DeformableDetrModelIntegrationTests(unittest.TestCase):
self.assertTrue(torch.allclose(outputs.pred_boxes[0, :3, :3], expected_boxes, atol=1e-4)) self.assertTrue(torch.allclose(outputs.pred_boxes[0, :3, :3], expected_boxes, atol=1e-4))
# verify postprocessing # verify postprocessing
results = feature_extractor.post_process_object_detection( results = image_processor.post_process_object_detection(
outputs, threshold=0.3, target_sizes=[image.size[::-1]] outputs, threshold=0.3, target_sizes=[image.size[::-1]]
)[0] )[0]
expected_scores = torch.tensor([0.7999, 0.7894, 0.6331, 0.4720, 0.4382]).to(torch_device) expected_scores = torch.tensor([0.7999, 0.7894, 0.6331, 0.4720, 0.4382]).to(torch_device)
...@@ -612,9 +612,9 @@ class DeformableDetrModelIntegrationTests(unittest.TestCase): ...@@ -612,9 +612,9 @@ class DeformableDetrModelIntegrationTests(unittest.TestCase):
"SenseTime/deformable-detr-with-box-refine-two-stage" "SenseTime/deformable-detr-with-box-refine-two-stage"
).to(torch_device) ).to(torch_device)
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
encoding = feature_extractor(images=image, return_tensors="pt").to(torch_device) encoding = image_processor(images=image, return_tensors="pt").to(torch_device)
pixel_values = encoding["pixel_values"].to(torch_device) pixel_values = encoding["pixel_values"].to(torch_device)
pixel_mask = encoding["pixel_mask"].to(torch_device) pixel_mask = encoding["pixel_mask"].to(torch_device)
...@@ -639,9 +639,9 @@ class DeformableDetrModelIntegrationTests(unittest.TestCase): ...@@ -639,9 +639,9 @@ class DeformableDetrModelIntegrationTests(unittest.TestCase):
@require_torch_gpu @require_torch_gpu
def test_inference_object_detection_head_equivalence_cpu_gpu(self): def test_inference_object_detection_head_equivalence_cpu_gpu(self):
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
encoding = feature_extractor(images=image, return_tensors="pt") encoding = image_processor(images=image, return_tensors="pt")
pixel_values = encoding["pixel_values"] pixel_values = encoding["pixel_values"]
pixel_mask = encoding["pixel_mask"] pixel_mask = encoding["pixel_mask"]
......
...@@ -55,7 +55,7 @@ if is_torch_available(): ...@@ -55,7 +55,7 @@ if is_torch_available():
if is_vision_available(): if is_vision_available():
from PIL import Image from PIL import Image
from transformers import DeiTFeatureExtractor from transformers import DeiTImageProcessor
class DeiTModelTester: class DeiTModelTester:
...@@ -381,9 +381,9 @@ def prepare_img(): ...@@ -381,9 +381,9 @@ def prepare_img():
@require_vision @require_vision
class DeiTModelIntegrationTest(unittest.TestCase): class DeiTModelIntegrationTest(unittest.TestCase):
@cached_property @cached_property
def default_feature_extractor(self): def default_image_processor(self):
return ( return (
DeiTFeatureExtractor.from_pretrained("facebook/deit-base-distilled-patch16-224") DeiTImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-224")
if is_vision_available() if is_vision_available()
else None else None
) )
...@@ -394,9 +394,9 @@ class DeiTModelIntegrationTest(unittest.TestCase): ...@@ -394,9 +394,9 @@ class DeiTModelIntegrationTest(unittest.TestCase):
torch_device torch_device
) )
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device) inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass # forward pass
with torch.no_grad(): with torch.no_grad():
...@@ -420,10 +420,10 @@ class DeiTModelIntegrationTest(unittest.TestCase): ...@@ -420,10 +420,10 @@ class DeiTModelIntegrationTest(unittest.TestCase):
model = DeiTModel.from_pretrained( model = DeiTModel.from_pretrained(
"facebook/deit-base-distilled-patch16-224", torch_dtype=torch.float16, device_map="auto" "facebook/deit-base-distilled-patch16-224", torch_dtype=torch.float16, device_map="auto"
) )
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt") inputs = image_processor(images=image, return_tensors="pt")
pixel_values = inputs.pixel_values.to(torch_device) pixel_values = inputs.pixel_values.to(torch_device)
# forward pass to make sure inference works in fp16 # forward pass to make sure inference works in fp16
......
...@@ -46,7 +46,7 @@ if is_tf_available(): ...@@ -46,7 +46,7 @@ if is_tf_available():
if is_vision_available(): if is_vision_available():
from PIL import Image from PIL import Image
from transformers import DeiTFeatureExtractor from transformers import DeiTImageProcessor
class TFDeiTModelTester: class TFDeiTModelTester:
...@@ -266,9 +266,9 @@ def prepare_img(): ...@@ -266,9 +266,9 @@ def prepare_img():
@require_vision @require_vision
class DeiTModelIntegrationTest(unittest.TestCase): class DeiTModelIntegrationTest(unittest.TestCase):
@cached_property @cached_property
def default_feature_extractor(self): def default_image_processor(self):
return ( return (
DeiTFeatureExtractor.from_pretrained("facebook/deit-base-distilled-patch16-224") DeiTImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-224")
if is_vision_available() if is_vision_available()
else None else None
) )
...@@ -277,9 +277,9 @@ class DeiTModelIntegrationTest(unittest.TestCase): ...@@ -277,9 +277,9 @@ class DeiTModelIntegrationTest(unittest.TestCase):
def test_inference_image_classification_head(self): def test_inference_image_classification_head(self):
model = TFDeiTForImageClassificationWithTeacher.from_pretrained("facebook/deit-base-distilled-patch16-224") model = TFDeiTForImageClassificationWithTeacher.from_pretrained("facebook/deit-base-distilled-patch16-224")
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="tf") inputs = image_processor(images=image, return_tensors="tf")
# forward pass # forward pass
outputs = model(**inputs) outputs = model(**inputs)
......
...@@ -38,7 +38,7 @@ if is_timm_available(): ...@@ -38,7 +38,7 @@ if is_timm_available():
if is_vision_available(): if is_vision_available():
from PIL import Image from PIL import Image
from transformers import DetrFeatureExtractor from transformers import DetrImageProcessor
class DetrModelTester: class DetrModelTester:
...@@ -512,15 +512,15 @@ def prepare_img(): ...@@ -512,15 +512,15 @@ def prepare_img():
@slow @slow
class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase): class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase):
@cached_property @cached_property
def default_feature_extractor(self): def default_image_processor(self):
return DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50") if is_vision_available() else None return DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") if is_vision_available() else None
def test_inference_no_head(self): def test_inference_no_head(self):
model = DetrModel.from_pretrained("facebook/detr-resnet-50").to(torch_device) model = DetrModel.from_pretrained("facebook/detr-resnet-50").to(torch_device)
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
encoding = feature_extractor(images=image, return_tensors="pt").to(torch_device) encoding = image_processor(images=image, return_tensors="pt").to(torch_device)
with torch.no_grad(): with torch.no_grad():
outputs = model(**encoding) outputs = model(**encoding)
...@@ -535,9 +535,9 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase): ...@@ -535,9 +535,9 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase):
def test_inference_object_detection_head(self): def test_inference_object_detection_head(self):
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50").to(torch_device) model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50").to(torch_device)
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
encoding = feature_extractor(images=image, return_tensors="pt").to(torch_device) encoding = image_processor(images=image, return_tensors="pt").to(torch_device)
pixel_values = encoding["pixel_values"].to(torch_device) pixel_values = encoding["pixel_values"].to(torch_device)
pixel_mask = encoding["pixel_mask"].to(torch_device) pixel_mask = encoding["pixel_mask"].to(torch_device)
...@@ -560,7 +560,7 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase): ...@@ -560,7 +560,7 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase):
self.assertTrue(torch.allclose(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, atol=1e-4)) self.assertTrue(torch.allclose(outputs.pred_boxes[0, :3, :3], expected_slice_boxes, atol=1e-4))
# verify postprocessing # verify postprocessing
results = feature_extractor.post_process_object_detection( results = image_processor.post_process_object_detection(
outputs, threshold=0.3, target_sizes=[image.size[::-1]] outputs, threshold=0.3, target_sizes=[image.size[::-1]]
)[0] )[0]
expected_scores = torch.tensor([0.9982, 0.9960, 0.9955, 0.9988, 0.9987]).to(torch_device) expected_scores = torch.tensor([0.9982, 0.9960, 0.9955, 0.9988, 0.9987]).to(torch_device)
...@@ -575,9 +575,9 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase): ...@@ -575,9 +575,9 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase):
def test_inference_panoptic_segmentation_head(self): def test_inference_panoptic_segmentation_head(self):
model = DetrForSegmentation.from_pretrained("facebook/detr-resnet-50-panoptic").to(torch_device) model = DetrForSegmentation.from_pretrained("facebook/detr-resnet-50-panoptic").to(torch_device)
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
encoding = feature_extractor(images=image, return_tensors="pt").to(torch_device) encoding = image_processor(images=image, return_tensors="pt").to(torch_device)
pixel_values = encoding["pixel_values"].to(torch_device) pixel_values = encoding["pixel_values"].to(torch_device)
pixel_mask = encoding["pixel_mask"].to(torch_device) pixel_mask = encoding["pixel_mask"].to(torch_device)
...@@ -607,7 +607,7 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase): ...@@ -607,7 +607,7 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase):
self.assertTrue(torch.allclose(outputs.pred_masks[0, 0, :3, :3], expected_slice_masks, atol=1e-3)) self.assertTrue(torch.allclose(outputs.pred_masks[0, 0, :3, :3], expected_slice_masks, atol=1e-3))
# verify postprocessing # verify postprocessing
results = feature_extractor.post_process_panoptic_segmentation( results = image_processor.post_process_panoptic_segmentation(
outputs, threshold=0.3, target_sizes=[image.size[::-1]] outputs, threshold=0.3, target_sizes=[image.size[::-1]]
)[0] )[0]
...@@ -633,9 +633,9 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase): ...@@ -633,9 +633,9 @@ class DetrModelIntegrationTestsTimmBackbone(unittest.TestCase):
@slow @slow
class DetrModelIntegrationTests(unittest.TestCase): class DetrModelIntegrationTests(unittest.TestCase):
@cached_property @cached_property
def default_feature_extractor(self): def default_image_processor(self):
return ( return (
DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50", revision="no_timm") DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
if is_vision_available() if is_vision_available()
else None else None
) )
...@@ -643,9 +643,9 @@ class DetrModelIntegrationTests(unittest.TestCase): ...@@ -643,9 +643,9 @@ class DetrModelIntegrationTests(unittest.TestCase):
def test_inference_no_head(self): def test_inference_no_head(self):
model = DetrModel.from_pretrained("facebook/detr-resnet-50", revision="no_timm").to(torch_device) model = DetrModel.from_pretrained("facebook/detr-resnet-50", revision="no_timm").to(torch_device)
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
encoding = feature_extractor(images=image, return_tensors="pt").to(torch_device) encoding = image_processor(images=image, return_tensors="pt").to(torch_device)
with torch.no_grad(): with torch.no_grad():
outputs = model(**encoding) outputs = model(**encoding)
......
...@@ -367,16 +367,16 @@ class DinatModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): ...@@ -367,16 +367,16 @@ class DinatModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
@require_torch @require_torch
class DinatModelIntegrationTest(unittest.TestCase): class DinatModelIntegrationTest(unittest.TestCase):
@cached_property @cached_property
def default_feature_extractor(self): def default_image_processor(self):
return AutoImageProcessor.from_pretrained("shi-labs/dinat-mini-in1k-224") if is_vision_available() else None return AutoImageProcessor.from_pretrained("shi-labs/dinat-mini-in1k-224") if is_vision_available() else None
@slow @slow
def test_inference_image_classification_head(self): def test_inference_image_classification_head(self):
model = DinatForImageClassification.from_pretrained("shi-labs/dinat-mini-in1k-224").to(torch_device) model = DinatForImageClassification.from_pretrained("shi-labs/dinat-mini-in1k-224").to(torch_device)
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device) inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass # forward pass
with torch.no_grad(): with torch.no_grad():
......
...@@ -25,7 +25,7 @@ if is_torch_available(): ...@@ -25,7 +25,7 @@ if is_torch_available():
from transformers import AutoModelForImageClassification from transformers import AutoModelForImageClassification
if is_vision_available(): if is_vision_available():
from transformers import AutoFeatureExtractor from transformers import AutoImageProcessor
@require_torch @require_torch
...@@ -33,7 +33,7 @@ if is_vision_available(): ...@@ -33,7 +33,7 @@ if is_vision_available():
class DiTIntegrationTest(unittest.TestCase): class DiTIntegrationTest(unittest.TestCase):
@slow @slow
def test_for_image_classification(self): def test_for_image_classification(self):
feature_extractor = AutoFeatureExtractor.from_pretrained("microsoft/dit-base-finetuned-rvlcdip") image_processor = AutoImageProcessor.from_pretrained("microsoft/dit-base-finetuned-rvlcdip")
model = AutoModelForImageClassification.from_pretrained("microsoft/dit-base-finetuned-rvlcdip") model = AutoModelForImageClassification.from_pretrained("microsoft/dit-base-finetuned-rvlcdip")
model.to(torch_device) model.to(torch_device)
...@@ -43,7 +43,7 @@ class DiTIntegrationTest(unittest.TestCase): ...@@ -43,7 +43,7 @@ class DiTIntegrationTest(unittest.TestCase):
image = dataset["train"][0]["image"].convert("RGB") image = dataset["train"][0]["image"].convert("RGB")
inputs = feature_extractor(image, return_tensors="pt").to(torch_device) inputs = image_processor(image, return_tensors="pt").to(torch_device)
# forward pass # forward pass
with torch.no_grad(): with torch.no_grad():
......
...@@ -39,7 +39,7 @@ if is_torch_available(): ...@@ -39,7 +39,7 @@ if is_torch_available():
if is_vision_available(): if is_vision_available():
from PIL import Image from PIL import Image
from transformers import DPTFeatureExtractor from transformers import DPTImageProcessor
class DPTModelTester: class DPTModelTester:
...@@ -293,11 +293,11 @@ def prepare_img(): ...@@ -293,11 +293,11 @@ def prepare_img():
@slow @slow
class DPTModelIntegrationTest(unittest.TestCase): class DPTModelIntegrationTest(unittest.TestCase):
def test_inference_depth_estimation(self): def test_inference_depth_estimation(self):
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large") image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(torch_device) model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(torch_device)
image = prepare_img() image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device) inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass # forward pass
with torch.no_grad(): with torch.no_grad():
...@@ -315,11 +315,11 @@ class DPTModelIntegrationTest(unittest.TestCase): ...@@ -315,11 +315,11 @@ class DPTModelIntegrationTest(unittest.TestCase):
self.assertTrue(torch.allclose(outputs.predicted_depth[0, :3, :3], expected_slice, atol=1e-4)) self.assertTrue(torch.allclose(outputs.predicted_depth[0, :3, :3], expected_slice, atol=1e-4))
def test_inference_semantic_segmentation(self): def test_inference_semantic_segmentation(self):
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large-ade") image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large-ade")
model = DPTForSemanticSegmentation.from_pretrained("Intel/dpt-large-ade").to(torch_device) model = DPTForSemanticSegmentation.from_pretrained("Intel/dpt-large-ade").to(torch_device)
image = prepare_img() image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device) inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass # forward pass
with torch.no_grad(): with torch.no_grad():
...@@ -336,11 +336,11 @@ class DPTModelIntegrationTest(unittest.TestCase): ...@@ -336,11 +336,11 @@ class DPTModelIntegrationTest(unittest.TestCase):
self.assertTrue(torch.allclose(outputs.logits[0, 0, :3, :3], expected_slice, atol=1e-4)) self.assertTrue(torch.allclose(outputs.logits[0, 0, :3, :3], expected_slice, atol=1e-4))
def test_post_processing_semantic_segmentation(self): def test_post_processing_semantic_segmentation(self):
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large-ade") image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large-ade")
model = DPTForSemanticSegmentation.from_pretrained("Intel/dpt-large-ade").to(torch_device) model = DPTForSemanticSegmentation.from_pretrained("Intel/dpt-large-ade").to(torch_device)
image = prepare_img() image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device) inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass # forward pass
with torch.no_grad(): with torch.no_grad():
...@@ -348,10 +348,10 @@ class DPTModelIntegrationTest(unittest.TestCase): ...@@ -348,10 +348,10 @@ class DPTModelIntegrationTest(unittest.TestCase):
outputs.logits = outputs.logits.detach().cpu() outputs.logits = outputs.logits.detach().cpu()
segmentation = feature_extractor.post_process_semantic_segmentation(outputs=outputs, target_sizes=[(500, 300)]) segmentation = image_processor.post_process_semantic_segmentation(outputs=outputs, target_sizes=[(500, 300)])
expected_shape = torch.Size((500, 300)) expected_shape = torch.Size((500, 300))
self.assertEqual(segmentation[0].shape, expected_shape) self.assertEqual(segmentation[0].shape, expected_shape)
segmentation = feature_extractor.post_process_semantic_segmentation(outputs=outputs) segmentation = image_processor.post_process_semantic_segmentation(outputs=outputs)
expected_shape = torch.Size((480, 480)) expected_shape = torch.Size((480, 480))
self.assertEqual(segmentation[0].shape, expected_shape) self.assertEqual(segmentation[0].shape, expected_shape)
...@@ -39,7 +39,7 @@ if is_torch_available(): ...@@ -39,7 +39,7 @@ if is_torch_available():
if is_vision_available(): if is_vision_available():
from PIL import Image from PIL import Image
from transformers import DPTFeatureExtractor from transformers import DPTImageProcessor
class DPTModelTester: class DPTModelTester:
...@@ -314,11 +314,11 @@ def prepare_img(): ...@@ -314,11 +314,11 @@ def prepare_img():
@slow @slow
class DPTModelIntegrationTest(unittest.TestCase): class DPTModelIntegrationTest(unittest.TestCase):
def test_inference_depth_estimation(self): def test_inference_depth_estimation(self):
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas") image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-hybrid-midas")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(torch_device) model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(torch_device)
image = prepare_img() image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device) inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass # forward pass
with torch.no_grad(): with torch.no_grad():
......
...@@ -444,7 +444,7 @@ def prepare_img(): ...@@ -444,7 +444,7 @@ def prepare_img():
@require_vision @require_vision
class EfficientFormerModelIntegrationTest(unittest.TestCase): class EfficientFormerModelIntegrationTest(unittest.TestCase):
@cached_property @cached_property
def default_feature_extractor(self): def default_image_processor(self):
return ( return (
EfficientFormerImageProcessor.from_pretrained("snap-research/efficientformer-l1-300") EfficientFormerImageProcessor.from_pretrained("snap-research/efficientformer-l1-300")
if is_vision_available() if is_vision_available()
...@@ -457,9 +457,9 @@ class EfficientFormerModelIntegrationTest(unittest.TestCase): ...@@ -457,9 +457,9 @@ class EfficientFormerModelIntegrationTest(unittest.TestCase):
torch_device torch_device
) )
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device) inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass # forward pass
with torch.no_grad(): with torch.no_grad():
...@@ -478,9 +478,9 @@ class EfficientFormerModelIntegrationTest(unittest.TestCase): ...@@ -478,9 +478,9 @@ class EfficientFormerModelIntegrationTest(unittest.TestCase):
"snap-research/efficientformer-l1-300" "snap-research/efficientformer-l1-300"
).to(torch_device) ).to(torch_device)
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device) inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass # forward pass
with torch.no_grad(): with torch.no_grad():
......
...@@ -37,7 +37,7 @@ if is_torch_available(): ...@@ -37,7 +37,7 @@ if is_torch_available():
if is_vision_available(): if is_vision_available():
from PIL import Image from PIL import Image
from transformers import GLPNFeatureExtractor from transformers import GLPNImageProcessor
class GLPNConfigTester(ConfigTester): class GLPNConfigTester(ConfigTester):
...@@ -337,11 +337,11 @@ def prepare_img(): ...@@ -337,11 +337,11 @@ def prepare_img():
class GLPNModelIntegrationTest(unittest.TestCase): class GLPNModelIntegrationTest(unittest.TestCase):
@slow @slow
def test_inference_depth_estimation(self): def test_inference_depth_estimation(self):
feature_extractor = GLPNFeatureExtractor.from_pretrained(GLPN_PRETRAINED_MODEL_ARCHIVE_LIST[0]) image_processor = GLPNImageProcessor.from_pretrained(GLPN_PRETRAINED_MODEL_ARCHIVE_LIST[0])
model = GLPNForDepthEstimation.from_pretrained(GLPN_PRETRAINED_MODEL_ARCHIVE_LIST[0]).to(torch_device) model = GLPNForDepthEstimation.from_pretrained(GLPN_PRETRAINED_MODEL_ARCHIVE_LIST[0]).to(torch_device)
image = prepare_img() image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device) inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass # forward pass
with torch.no_grad(): with torch.no_grad():
......
...@@ -49,7 +49,7 @@ if is_torch_available(): ...@@ -49,7 +49,7 @@ if is_torch_available():
if is_vision_available(): if is_vision_available():
from PIL import Image from PIL import Image
from transformers import ImageGPTFeatureExtractor from transformers import ImageGPTImageProcessor
class ImageGPTModelTester: class ImageGPTModelTester:
...@@ -535,16 +535,16 @@ def prepare_img(): ...@@ -535,16 +535,16 @@ def prepare_img():
@require_vision @require_vision
class ImageGPTModelIntegrationTest(unittest.TestCase): class ImageGPTModelIntegrationTest(unittest.TestCase):
@cached_property @cached_property
def default_feature_extractor(self): def default_image_processor(self):
return ImageGPTFeatureExtractor.from_pretrained("openai/imagegpt-small") if is_vision_available() else None return ImageGPTImageProcessor.from_pretrained("openai/imagegpt-small") if is_vision_available() else None
@slow @slow
def test_inference_causal_lm_head(self): def test_inference_causal_lm_head(self):
model = ImageGPTForCausalImageModeling.from_pretrained("openai/imagegpt-small").to(torch_device) model = ImageGPTForCausalImageModeling.from_pretrained("openai/imagegpt-small").to(torch_device)
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device) inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass # forward pass
with torch.no_grad(): with torch.no_grad():
......
...@@ -45,7 +45,7 @@ if is_torch_available(): ...@@ -45,7 +45,7 @@ if is_torch_available():
if is_vision_available(): if is_vision_available():
from PIL import Image from PIL import Image
from transformers import LayoutLMv3FeatureExtractor from transformers import LayoutLMv3ImageProcessor
class LayoutLMv3ModelTester: class LayoutLMv3ModelTester:
...@@ -382,16 +382,16 @@ def prepare_img(): ...@@ -382,16 +382,16 @@ def prepare_img():
@require_torch @require_torch
class LayoutLMv3ModelIntegrationTest(unittest.TestCase): class LayoutLMv3ModelIntegrationTest(unittest.TestCase):
@cached_property @cached_property
def default_feature_extractor(self): def default_image_processor(self):
return LayoutLMv3FeatureExtractor(apply_ocr=False) if is_vision_available() else None return LayoutLMv3ImageProcessor(apply_ocr=False) if is_vision_available() else None
@slow @slow
def test_inference_no_head(self): def test_inference_no_head(self):
model = LayoutLMv3Model.from_pretrained("microsoft/layoutlmv3-base").to(torch_device) model = LayoutLMv3Model.from_pretrained("microsoft/layoutlmv3-base").to(torch_device)
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values.to(torch_device) pixel_values = image_processor(images=image, return_tensors="pt").pixel_values.to(torch_device)
input_ids = torch.tensor([[1, 2]]) input_ids = torch.tensor([[1, 2]])
bbox = torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]]).unsqueeze(0) bbox = torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]]).unsqueeze(0)
......
...@@ -51,7 +51,7 @@ if is_tf_available(): ...@@ -51,7 +51,7 @@ if is_tf_available():
if is_vision_available(): if is_vision_available():
from PIL import Image from PIL import Image
from transformers import LayoutLMv3FeatureExtractor from transformers import LayoutLMv3ImageProcessor
class TFLayoutLMv3ModelTester: class TFLayoutLMv3ModelTester:
...@@ -482,16 +482,16 @@ def prepare_img(): ...@@ -482,16 +482,16 @@ def prepare_img():
@require_tf @require_tf
class TFLayoutLMv3ModelIntegrationTest(unittest.TestCase): class TFLayoutLMv3ModelIntegrationTest(unittest.TestCase):
@cached_property @cached_property
def default_feature_extractor(self): def default_image_processor(self):
return LayoutLMv3FeatureExtractor(apply_ocr=False) if is_vision_available() else None return LayoutLMv3ImageProcessor(apply_ocr=False) if is_vision_available() else None
@slow @slow
def test_inference_no_head(self): def test_inference_no_head(self):
model = TFLayoutLMv3Model.from_pretrained("microsoft/layoutlmv3-base") model = TFLayoutLMv3Model.from_pretrained("microsoft/layoutlmv3-base")
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
pixel_values = feature_extractor(images=image, return_tensors="tf").pixel_values pixel_values = image_processor(images=image, return_tensors="tf").pixel_values
input_ids = tf.constant([[1, 2]]) input_ids = tf.constant([[1, 2]])
bbox = tf.expand_dims(tf.constant([[1, 2, 3, 4], [5, 6, 7, 8]]), axis=0) bbox = tf.expand_dims(tf.constant([[1, 2, 3, 4], [5, 6, 7, 8]]), axis=0)
......
...@@ -36,7 +36,7 @@ from transformers.utils import FEATURE_EXTRACTOR_NAME, cached_property, is_pytes ...@@ -36,7 +36,7 @@ from transformers.utils import FEATURE_EXTRACTOR_NAME, cached_property, is_pytes
if is_pytesseract_available(): if is_pytesseract_available():
from PIL import Image from PIL import Image
from transformers import LayoutLMv2FeatureExtractor, LayoutXLMProcessor from transformers import LayoutLMv2ImageProcessor, LayoutXLMProcessor
@require_pytesseract @require_pytesseract
...@@ -47,7 +47,7 @@ class LayoutXLMProcessorTest(unittest.TestCase): ...@@ -47,7 +47,7 @@ class LayoutXLMProcessorTest(unittest.TestCase):
rust_tokenizer_class = LayoutXLMTokenizerFast rust_tokenizer_class = LayoutXLMTokenizerFast
def setUp(self): def setUp(self):
feature_extractor_map = { image_processor_map = {
"do_resize": True, "do_resize": True,
"size": 224, "size": 224,
"apply_ocr": True, "apply_ocr": True,
...@@ -56,7 +56,7 @@ class LayoutXLMProcessorTest(unittest.TestCase): ...@@ -56,7 +56,7 @@ class LayoutXLMProcessorTest(unittest.TestCase):
self.tmpdirname = tempfile.mkdtemp() self.tmpdirname = tempfile.mkdtemp()
self.feature_extraction_file = os.path.join(self.tmpdirname, FEATURE_EXTRACTOR_NAME) self.feature_extraction_file = os.path.join(self.tmpdirname, FEATURE_EXTRACTOR_NAME)
with open(self.feature_extraction_file, "w", encoding="utf-8") as fp: with open(self.feature_extraction_file, "w", encoding="utf-8") as fp:
fp.write(json.dumps(feature_extractor_map) + "\n") fp.write(json.dumps(image_processor_map) + "\n")
# taken from `test_tokenization_layoutxlm.LayoutXLMTokenizationTest.test_save_pretrained` # taken from `test_tokenization_layoutxlm.LayoutXLMTokenizationTest.test_save_pretrained`
self.tokenizer_pretrained_name = "hf-internal-testing/tiny-random-layoutxlm" self.tokenizer_pretrained_name = "hf-internal-testing/tiny-random-layoutxlm"
...@@ -70,8 +70,8 @@ class LayoutXLMProcessorTest(unittest.TestCase): ...@@ -70,8 +70,8 @@ class LayoutXLMProcessorTest(unittest.TestCase):
def get_tokenizers(self, **kwargs) -> List[PreTrainedTokenizerBase]: def get_tokenizers(self, **kwargs) -> List[PreTrainedTokenizerBase]:
return [self.get_tokenizer(**kwargs), self.get_rust_tokenizer(**kwargs)] return [self.get_tokenizer(**kwargs), self.get_rust_tokenizer(**kwargs)]
def get_feature_extractor(self, **kwargs): def get_image_processor(self, **kwargs):
return LayoutLMv2FeatureExtractor.from_pretrained(self.tmpdirname, **kwargs) return LayoutLMv2ImageProcessor.from_pretrained(self.tmpdirname, **kwargs)
def tearDown(self): def tearDown(self):
shutil.rmtree(self.tmpdirname) shutil.rmtree(self.tmpdirname)
...@@ -88,10 +88,10 @@ class LayoutXLMProcessorTest(unittest.TestCase): ...@@ -88,10 +88,10 @@ class LayoutXLMProcessorTest(unittest.TestCase):
return image_inputs return image_inputs
def test_save_load_pretrained_default(self): def test_save_load_pretrained_default(self):
feature_extractor = self.get_feature_extractor() image_processor = self.get_image_processor()
tokenizers = self.get_tokenizers() tokenizers = self.get_tokenizers()
for tokenizer in tokenizers: for tokenizer in tokenizers:
processor = LayoutXLMProcessor(feature_extractor=feature_extractor, tokenizer=tokenizer) processor = LayoutXLMProcessor(image_processor=image_processor, tokenizer=tokenizer)
processor.save_pretrained(self.tmpdirname) processor.save_pretrained(self.tmpdirname)
processor = LayoutXLMProcessor.from_pretrained(self.tmpdirname) processor = LayoutXLMProcessor.from_pretrained(self.tmpdirname)
...@@ -99,16 +99,16 @@ class LayoutXLMProcessorTest(unittest.TestCase): ...@@ -99,16 +99,16 @@ class LayoutXLMProcessorTest(unittest.TestCase):
self.assertEqual(processor.tokenizer.get_vocab(), tokenizer.get_vocab()) self.assertEqual(processor.tokenizer.get_vocab(), tokenizer.get_vocab())
self.assertIsInstance(processor.tokenizer, (LayoutXLMTokenizer, LayoutXLMTokenizerFast)) self.assertIsInstance(processor.tokenizer, (LayoutXLMTokenizer, LayoutXLMTokenizerFast))
self.assertEqual(processor.feature_extractor.to_json_string(), feature_extractor.to_json_string()) self.assertEqual(processor.image_processor.to_json_string(), image_processor.to_json_string())
self.assertIsInstance(processor.feature_extractor, LayoutLMv2FeatureExtractor) self.assertIsInstance(processor.image_processor, LayoutLMv2ImageProcessor)
def test_save_load_pretrained_additional_features(self): def test_save_load_pretrained_additional_features(self):
processor = LayoutXLMProcessor(feature_extractor=self.get_feature_extractor(), tokenizer=self.get_tokenizer()) processor = LayoutXLMProcessor(image_processor=self.get_image_processor(), tokenizer=self.get_tokenizer())
processor.save_pretrained(self.tmpdirname) processor.save_pretrained(self.tmpdirname)
# slow tokenizer # slow tokenizer
tokenizer_add_kwargs = self.get_tokenizer(bos_token="(BOS)", eos_token="(EOS)") tokenizer_add_kwargs = self.get_tokenizer(bos_token="(BOS)", eos_token="(EOS)")
feature_extractor_add_kwargs = self.get_feature_extractor(do_resize=False, size=30) image_processor_add_kwargs = self.get_image_processor(do_resize=False, size=30)
processor = LayoutXLMProcessor.from_pretrained( processor = LayoutXLMProcessor.from_pretrained(
self.tmpdirname, self.tmpdirname,
...@@ -122,12 +122,12 @@ class LayoutXLMProcessorTest(unittest.TestCase): ...@@ -122,12 +122,12 @@ class LayoutXLMProcessorTest(unittest.TestCase):
self.assertEqual(processor.tokenizer.get_vocab(), tokenizer_add_kwargs.get_vocab()) self.assertEqual(processor.tokenizer.get_vocab(), tokenizer_add_kwargs.get_vocab())
self.assertIsInstance(processor.tokenizer, LayoutXLMTokenizer) self.assertIsInstance(processor.tokenizer, LayoutXLMTokenizer)
self.assertEqual(processor.feature_extractor.to_json_string(), feature_extractor_add_kwargs.to_json_string()) self.assertEqual(processor.image_processor.to_json_string(), image_processor_add_kwargs.to_json_string())
self.assertIsInstance(processor.feature_extractor, LayoutLMv2FeatureExtractor) self.assertIsInstance(processor.image_processor, LayoutLMv2ImageProcessor)
# fast tokenizer # fast tokenizer
tokenizer_add_kwargs = self.get_rust_tokenizer(bos_token="(BOS)", eos_token="(EOS)") tokenizer_add_kwargs = self.get_rust_tokenizer(bos_token="(BOS)", eos_token="(EOS)")
feature_extractor_add_kwargs = self.get_feature_extractor(do_resize=False, size=30) image_processor_add_kwargs = self.get_image_processor(do_resize=False, size=30)
processor = LayoutXLMProcessor.from_pretrained( processor = LayoutXLMProcessor.from_pretrained(
self.tmpdirname, use_xlm=True, bos_token="(BOS)", eos_token="(EOS)", do_resize=False, size=30 self.tmpdirname, use_xlm=True, bos_token="(BOS)", eos_token="(EOS)", do_resize=False, size=30
...@@ -136,14 +136,14 @@ class LayoutXLMProcessorTest(unittest.TestCase): ...@@ -136,14 +136,14 @@ class LayoutXLMProcessorTest(unittest.TestCase):
self.assertEqual(processor.tokenizer.get_vocab(), tokenizer_add_kwargs.get_vocab()) self.assertEqual(processor.tokenizer.get_vocab(), tokenizer_add_kwargs.get_vocab())
self.assertIsInstance(processor.tokenizer, LayoutXLMTokenizerFast) self.assertIsInstance(processor.tokenizer, LayoutXLMTokenizerFast)
self.assertEqual(processor.feature_extractor.to_json_string(), feature_extractor_add_kwargs.to_json_string()) self.assertEqual(processor.image_processor.to_json_string(), image_processor_add_kwargs.to_json_string())
self.assertIsInstance(processor.feature_extractor, LayoutLMv2FeatureExtractor) self.assertIsInstance(processor.image_processor, LayoutLMv2ImageProcessor)
def test_model_input_names(self): def test_model_input_names(self):
feature_extractor = self.get_feature_extractor() image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer() tokenizer = self.get_tokenizer()
processor = LayoutXLMProcessor(tokenizer=tokenizer, feature_extractor=feature_extractor) processor = LayoutXLMProcessor(tokenizer=tokenizer, image_processor=image_processor)
input_str = "lower newer" input_str = "lower newer"
image_input = self.prepare_image_inputs() image_input = self.prepare_image_inputs()
...@@ -215,15 +215,15 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase): ...@@ -215,15 +215,15 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
def test_processor_case_1(self): def test_processor_case_1(self):
# case 1: document image classification (training, inference) + token classification (inference), apply_ocr = True # case 1: document image classification (training, inference) + token classification (inference), apply_ocr = True
feature_extractor = LayoutLMv2FeatureExtractor() image_processor = LayoutLMv2ImageProcessor()
tokenizers = self.get_tokenizers tokenizers = self.get_tokenizers
images = self.get_images images = self.get_images
for tokenizer in tokenizers: for tokenizer in tokenizers:
processor = LayoutXLMProcessor(feature_extractor=feature_extractor, tokenizer=tokenizer) processor = LayoutXLMProcessor(image_processor=image_processor, tokenizer=tokenizer)
# not batched # not batched
input_feat_extract = feature_extractor(images[0], return_tensors="pt") input_feat_extract = image_processor(images[0], return_tensors="pt")
input_processor = processor(images[0], return_tensors="pt") input_processor = processor(images[0], return_tensors="pt")
# verify keys # verify keys
...@@ -245,7 +245,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase): ...@@ -245,7 +245,7 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
self.assertSequenceEqual(decoding, expected_decoding) self.assertSequenceEqual(decoding, expected_decoding)
# batched # batched
input_feat_extract = feature_extractor(images, return_tensors="pt") input_feat_extract = image_processor(images, return_tensors="pt")
input_processor = processor(images, padding=True, return_tensors="pt") input_processor = processor(images, padding=True, return_tensors="pt")
# verify keys # verify keys
...@@ -270,12 +270,12 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase): ...@@ -270,12 +270,12 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
def test_processor_case_2(self): def test_processor_case_2(self):
# case 2: document image classification (training, inference) + token classification (inference), apply_ocr=False # case 2: document image classification (training, inference) + token classification (inference), apply_ocr=False
feature_extractor = LayoutLMv2FeatureExtractor(apply_ocr=False) image_processor = LayoutLMv2ImageProcessor(apply_ocr=False)
tokenizers = self.get_tokenizers tokenizers = self.get_tokenizers
images = self.get_images images = self.get_images
for tokenizer in tokenizers: for tokenizer in tokenizers:
processor = LayoutXLMProcessor(feature_extractor=feature_extractor, tokenizer=tokenizer) processor = LayoutXLMProcessor(image_processor=image_processor, tokenizer=tokenizer)
# not batched # not batched
words = ["hello", "world"] words = ["hello", "world"]
...@@ -324,12 +324,12 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase): ...@@ -324,12 +324,12 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
def test_processor_case_3(self): def test_processor_case_3(self):
# case 3: token classification (training), apply_ocr=False # case 3: token classification (training), apply_ocr=False
feature_extractor = LayoutLMv2FeatureExtractor(apply_ocr=False) image_processor = LayoutLMv2ImageProcessor(apply_ocr=False)
tokenizers = self.get_tokenizers tokenizers = self.get_tokenizers
images = self.get_images images = self.get_images
for tokenizer in tokenizers: for tokenizer in tokenizers:
processor = LayoutXLMProcessor(feature_extractor=feature_extractor, tokenizer=tokenizer) processor = LayoutXLMProcessor(image_processor=image_processor, tokenizer=tokenizer)
# not batched # not batched
words = ["weirdly", "world"] words = ["weirdly", "world"]
...@@ -389,12 +389,12 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase): ...@@ -389,12 +389,12 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
def test_processor_case_4(self): def test_processor_case_4(self):
# case 4: visual question answering (inference), apply_ocr=True # case 4: visual question answering (inference), apply_ocr=True
feature_extractor = LayoutLMv2FeatureExtractor() image_processor = LayoutLMv2ImageProcessor()
tokenizers = self.get_tokenizers tokenizers = self.get_tokenizers
images = self.get_images images = self.get_images
for tokenizer in tokenizers: for tokenizer in tokenizers:
processor = LayoutXLMProcessor(feature_extractor=feature_extractor, tokenizer=tokenizer) processor = LayoutXLMProcessor(image_processor=image_processor, tokenizer=tokenizer)
# not batched # not batched
question = "What's his name?" question = "What's his name?"
...@@ -440,12 +440,12 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase): ...@@ -440,12 +440,12 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
def test_processor_case_5(self): def test_processor_case_5(self):
# case 5: visual question answering (inference), apply_ocr=False # case 5: visual question answering (inference), apply_ocr=False
feature_extractor = LayoutLMv2FeatureExtractor(apply_ocr=False) image_processor = LayoutLMv2ImageProcessor(apply_ocr=False)
tokenizers = self.get_tokenizers tokenizers = self.get_tokenizers
images = self.get_images images = self.get_images
for tokenizer in tokenizers: for tokenizer in tokenizers:
processor = LayoutXLMProcessor(feature_extractor=feature_extractor, tokenizer=tokenizer) processor = LayoutXLMProcessor(image_processor=image_processor, tokenizer=tokenizer)
# not batched # not batched
question = "What's his name?" question = "What's his name?"
......
...@@ -46,7 +46,7 @@ if is_torch_available(): ...@@ -46,7 +46,7 @@ if is_torch_available():
if is_vision_available(): if is_vision_available():
from PIL import Image from PIL import Image
from transformers import LevitFeatureExtractor from transformers import LevitImageProcessor
class LevitConfigTester(ConfigTester): class LevitConfigTester(ConfigTester):
...@@ -409,8 +409,8 @@ def prepare_img(): ...@@ -409,8 +409,8 @@ def prepare_img():
@require_vision @require_vision
class LevitModelIntegrationTest(unittest.TestCase): class LevitModelIntegrationTest(unittest.TestCase):
@cached_property @cached_property
def default_feature_extractor(self): def default_image_processor(self):
return LevitFeatureExtractor.from_pretrained(LEVIT_PRETRAINED_MODEL_ARCHIVE_LIST[0]) return LevitImageProcessor.from_pretrained(LEVIT_PRETRAINED_MODEL_ARCHIVE_LIST[0])
@slow @slow
def test_inference_image_classification_head(self): def test_inference_image_classification_head(self):
...@@ -418,9 +418,9 @@ class LevitModelIntegrationTest(unittest.TestCase): ...@@ -418,9 +418,9 @@ class LevitModelIntegrationTest(unittest.TestCase):
torch_device torch_device
) )
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
inputs = feature_extractor(images=image, return_tensors="pt").to(torch_device) inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
# forward pass # forward pass
with torch.no_grad(): with torch.no_grad():
......
...@@ -545,9 +545,9 @@ class Mask2FormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Te ...@@ -545,9 +545,9 @@ class Mask2FormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Te
self.assertEqual(segmentation[0].shape, target_sizes[0]) self.assertEqual(segmentation[0].shape, target_sizes[0])
def test_post_process_instance_segmentation(self): def test_post_process_instance_segmentation(self):
feature_extractor = self.image_processing_class(num_labels=self.image_processor_tester.num_classes) image_processor = self.image_processing_class(num_labels=self.image_processor_tester.num_classes)
outputs = self.image_processor_tester.get_fake_mask2former_outputs() outputs = self.image_processor_tester.get_fake_mask2former_outputs()
segmentation = feature_extractor.post_process_instance_segmentation(outputs, threshold=0) segmentation = image_processor.post_process_instance_segmentation(outputs, threshold=0)
self.assertTrue(len(segmentation) == self.image_processor_tester.batch_size) self.assertTrue(len(segmentation) == self.image_processor_tester.batch_size)
for el in segmentation: for el in segmentation:
...@@ -556,7 +556,7 @@ class Mask2FormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Te ...@@ -556,7 +556,7 @@ class Mask2FormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.Te
self.assertEqual(type(el["segments_info"]), list) self.assertEqual(type(el["segments_info"]), list)
self.assertEqual(el["segmentation"].shape, (384, 384)) self.assertEqual(el["segmentation"].shape, (384, 384))
segmentation = feature_extractor.post_process_instance_segmentation( segmentation = image_processor.post_process_instance_segmentation(
outputs, threshold=0, return_binary_maps=True outputs, threshold=0, return_binary_maps=True
) )
......
...@@ -325,14 +325,14 @@ class Mask2FormerModelIntegrationTest(unittest.TestCase): ...@@ -325,14 +325,14 @@ class Mask2FormerModelIntegrationTest(unittest.TestCase):
return "facebook/mask2former-swin-small-coco-instance" return "facebook/mask2former-swin-small-coco-instance"
@cached_property @cached_property
def default_feature_extractor(self): def default_image_processor(self):
return Mask2FormerImageProcessor.from_pretrained(self.model_checkpoints) if is_vision_available() else None return Mask2FormerImageProcessor.from_pretrained(self.model_checkpoints) if is_vision_available() else None
def test_inference_no_head(self): def test_inference_no_head(self):
model = Mask2FormerModel.from_pretrained(self.model_checkpoints).to(torch_device) model = Mask2FormerModel.from_pretrained(self.model_checkpoints).to(torch_device)
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
inputs = feature_extractor(image, return_tensors="pt").to(torch_device) inputs = image_processor(image, return_tensors="pt").to(torch_device)
inputs_shape = inputs["pixel_values"].shape inputs_shape = inputs["pixel_values"].shape
# check size is divisible by 32 # check size is divisible by 32
self.assertTrue((inputs_shape[-1] % 32) == 0 and (inputs_shape[-2] % 32) == 0) self.assertTrue((inputs_shape[-1] % 32) == 0 and (inputs_shape[-2] % 32) == 0)
...@@ -371,9 +371,9 @@ class Mask2FormerModelIntegrationTest(unittest.TestCase): ...@@ -371,9 +371,9 @@ class Mask2FormerModelIntegrationTest(unittest.TestCase):
def test_inference_universal_segmentation_head(self): def test_inference_universal_segmentation_head(self):
model = Mask2FormerForUniversalSegmentation.from_pretrained(self.model_checkpoints).to(torch_device).eval() model = Mask2FormerForUniversalSegmentation.from_pretrained(self.model_checkpoints).to(torch_device).eval()
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
image = prepare_img() image = prepare_img()
inputs = feature_extractor(image, return_tensors="pt").to(torch_device) inputs = image_processor(image, return_tensors="pt").to(torch_device)
inputs_shape = inputs["pixel_values"].shape inputs_shape = inputs["pixel_values"].shape
# check size is divisible by 32 # check size is divisible by 32
self.assertTrue((inputs_shape[-1] % 32) == 0 and (inputs_shape[-2] % 32) == 0) self.assertTrue((inputs_shape[-1] % 32) == 0 and (inputs_shape[-2] % 32) == 0)
...@@ -408,9 +408,9 @@ class Mask2FormerModelIntegrationTest(unittest.TestCase): ...@@ -408,9 +408,9 @@ class Mask2FormerModelIntegrationTest(unittest.TestCase):
def test_with_segmentation_maps_and_loss(self): def test_with_segmentation_maps_and_loss(self):
model = Mask2FormerForUniversalSegmentation.from_pretrained(self.model_checkpoints).to(torch_device).eval() model = Mask2FormerForUniversalSegmentation.from_pretrained(self.model_checkpoints).to(torch_device).eval()
feature_extractor = self.default_feature_extractor image_processor = self.default_image_processor
inputs = feature_extractor( inputs = image_processor(
[np.zeros((3, 800, 1333)), np.zeros((3, 800, 1333))], [np.zeros((3, 800, 1333)), np.zeros((3, 800, 1333))],
segmentation_maps=[np.zeros((384, 384)).astype(np.float32), np.zeros((384, 384)).astype(np.float32)], segmentation_maps=[np.zeros((384, 384)).astype(np.float32), np.zeros((384, 384)).astype(np.float32)],
return_tensors="pt", return_tensors="pt",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment