Detr preprocessor fix (#19007)

* fix in-place preprocessing of inputs

Detr preprocessor fix (#19007)
* fix in-place preprocessing of inputs
fe01ec34 · Alara Dirik · GitHub · 7e84723f · fe01ec34 · fe01ec34
Unverified Commit fe01ec34 authored Sep 23, 2022 by Alara Dirik Committed by GitHub Sep 23, 2022
3 changed files
--- a/src/transformers/models/conditional_detr/feature_extraction_conditional_detr.py
+++ b/src/transformers/models/conditional_detr/feature_extraction_conditional_detr.py
@@ -555,6 +555,12 @@ class ConditionalDetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtrac
            if annotations is not None:
                annotations = [annotations]
+        # Create deep copies to avoid editing inputs in place
+        images = [image for image in images]
+        if annotations is not None:
+            annotations = [annotation for annotation in annotations]
        # prepare (COCO annotations as a list of Dict -> ConditionalDETR target as a single Dict per image)
        if annotations is not None:
            for idx, (image, target) in enumerate(zip(images, annotations)):
@@ -587,6 +593,8 @@ class ConditionalDetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtrac
                images = [
                    self._normalize(image=image, mean=self.image_mean, std=self.image_std)[0] for image in images
                ]
+        else:
+            images = [np.array(image) for image in images]
        if pad_and_return_pixel_mask:
            # pad images up to largest image in batch and create pixel_mask

--- a/src/transformers/models/deformable_detr/feature_extraction_deformable_detr.py
+++ b/src/transformers/models/deformable_detr/feature_extraction_deformable_detr.py
@@ -555,6 +555,12 @@ class DeformableDetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtract
            if annotations is not None:
                annotations = [annotations]
+        # Create deep copies to avoid editing inputs in place
+        images = [image for image in images]
+        if annotations is not None:
+            annotations = [annotation for annotation in annotations]
        # prepare (COCO annotations as a list of Dict -> DETR target as a single Dict per image)
        if annotations is not None:
            for idx, (image, target) in enumerate(zip(images, annotations)):
@@ -587,6 +593,8 @@ class DeformableDetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtract
                images = [
                    self._normalize(image=image, mean=self.image_mean, std=self.image_std)[0] for image in images
                ]
+        else:
+            images = [np.array(image) for image in images]
        if pad_and_return_pixel_mask:
            # pad images up to largest image in batch and create pixel_mask

--- a/src/transformers/models/detr/feature_extraction_detr.py
+++ b/src/transformers/models/detr/feature_extraction_detr.py
@@ -547,6 +547,12 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
            if annotations is not None:
                annotations = [annotations]
+        # Create deep copies to avoid editing inputs in place
+        images = [image for image in images]
+        if annotations is not None:
+            annotations = [annotation for annotation in annotations]
        # prepare (COCO annotations as a list of Dict -> DETR target as a single Dict per image)
        if annotations is not None:
            for idx, (image, target) in enumerate(zip(images, annotations)):
@@ -579,6 +585,8 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
                images = [
                    self._normalize(image=image, mean=self.image_mean, std=self.image_std)[0] for image in images
                ]
+        else:
+            images = [np.array(image) for image in images]
        if pad_and_return_pixel_mask:
            # pad images up to largest image in batch and create pixel_mask