Unverified Commit fe01ec34 authored by Alara Dirik's avatar Alara Dirik Committed by GitHub
Browse files

Detr preprocessor fix (#19007)

* fix in-place preprocessing of inputs
parent 7e84723f
...@@ -555,6 +555,12 @@ class ConditionalDetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtrac ...@@ -555,6 +555,12 @@ class ConditionalDetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtrac
if annotations is not None: if annotations is not None:
annotations = [annotations] annotations = [annotations]
# Create deep copies to avoid editing inputs in place
images = [image for image in images]
if annotations is not None:
annotations = [annotation for annotation in annotations]
# prepare (COCO annotations as a list of Dict -> ConditionalDETR target as a single Dict per image) # prepare (COCO annotations as a list of Dict -> ConditionalDETR target as a single Dict per image)
if annotations is not None: if annotations is not None:
for idx, (image, target) in enumerate(zip(images, annotations)): for idx, (image, target) in enumerate(zip(images, annotations)):
...@@ -587,6 +593,8 @@ class ConditionalDetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtrac ...@@ -587,6 +593,8 @@ class ConditionalDetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtrac
images = [ images = [
self._normalize(image=image, mean=self.image_mean, std=self.image_std)[0] for image in images self._normalize(image=image, mean=self.image_mean, std=self.image_std)[0] for image in images
] ]
else:
images = [np.array(image) for image in images]
if pad_and_return_pixel_mask: if pad_and_return_pixel_mask:
# pad images up to largest image in batch and create pixel_mask # pad images up to largest image in batch and create pixel_mask
......
...@@ -555,6 +555,12 @@ class DeformableDetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtract ...@@ -555,6 +555,12 @@ class DeformableDetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtract
if annotations is not None: if annotations is not None:
annotations = [annotations] annotations = [annotations]
# Create deep copies to avoid editing inputs in place
images = [image for image in images]
if annotations is not None:
annotations = [annotation for annotation in annotations]
# prepare (COCO annotations as a list of Dict -> DETR target as a single Dict per image) # prepare (COCO annotations as a list of Dict -> DETR target as a single Dict per image)
if annotations is not None: if annotations is not None:
for idx, (image, target) in enumerate(zip(images, annotations)): for idx, (image, target) in enumerate(zip(images, annotations)):
...@@ -587,6 +593,8 @@ class DeformableDetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtract ...@@ -587,6 +593,8 @@ class DeformableDetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtract
images = [ images = [
self._normalize(image=image, mean=self.image_mean, std=self.image_std)[0] for image in images self._normalize(image=image, mean=self.image_mean, std=self.image_std)[0] for image in images
] ]
else:
images = [np.array(image) for image in images]
if pad_and_return_pixel_mask: if pad_and_return_pixel_mask:
# pad images up to largest image in batch and create pixel_mask # pad images up to largest image in batch and create pixel_mask
......
...@@ -547,6 +547,12 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): ...@@ -547,6 +547,12 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
if annotations is not None: if annotations is not None:
annotations = [annotations] annotations = [annotations]
# Create deep copies to avoid editing inputs in place
images = [image for image in images]
if annotations is not None:
annotations = [annotation for annotation in annotations]
# prepare (COCO annotations as a list of Dict -> DETR target as a single Dict per image) # prepare (COCO annotations as a list of Dict -> DETR target as a single Dict per image)
if annotations is not None: if annotations is not None:
for idx, (image, target) in enumerate(zip(images, annotations)): for idx, (image, target) in enumerate(zip(images, annotations)):
...@@ -579,6 +585,8 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): ...@@ -579,6 +585,8 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
images = [ images = [
self._normalize(image=image, mean=self.image_mean, std=self.image_std)[0] for image in images self._normalize(image=image, mean=self.image_mean, std=self.image_std)[0] for image in images
] ]
else:
images = [np.array(image) for image in images]
if pad_and_return_pixel_mask: if pad_and_return_pixel_mask:
# pad images up to largest image in batch and create pixel_mask # pad images up to largest image in batch and create pixel_mask
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment