Unverified Commit 5fd5990d authored by Nicolas Patry's avatar Nicolas Patry Committed by GitHub
Browse files

Factored out some code in the `image-segmentation` pipeline. (#19727)

* Factored out some code in the image-segmentation pipeline

Re-enable `small_model_pt`.

Re-enable `small_model_pt`.

Enabling the current test with the current values.

Debugging the values on the CI.

More logs ? Printing doesn't work ?

Using the CI values instead. Seems to be a Pillow sensitivity.

Added a test showcasing that models not supporting some tasks get a
clear error.

Factored out code.

Further factor out.

Fixup.

Bad rebase.

Put `panoptic` before `instance` as it should be a superset.

* Fixing tests.

* Adding subtasks tests

+ Fixes `instance` segmentation which was broken due to default and
non kwargs arguments.

* Fix bad replace.
parent 24476722
...@@ -1275,12 +1275,13 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): ...@@ -1275,12 +1275,13 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
# Get segmentation map and segment information of batch item # Get segmentation map and segment information of batch item
target_size = target_sizes[i] if target_sizes is not None else None target_size = target_sizes[i] if target_sizes is not None else None
segmentation, segments = compute_segments( segmentation, segments = compute_segments(
mask_probs_item, mask_probs=mask_probs_item,
pred_scores_item, pred_scores=pred_scores_item,
pred_labels_item, pred_labels=pred_labels_item,
mask_threshold, mask_threshold=mask_threshold,
overlap_mask_area_threshold, overlap_mask_area_threshold=overlap_mask_area_threshold,
target_size, label_ids_to_fuse=[],
target_size=target_size,
) )
# Return segmentation map in run-length encoding (RLE) format # Return segmentation map in run-length encoding (RLE) format
...@@ -1366,13 +1367,13 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): ...@@ -1366,13 +1367,13 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
# Get segmentation map and segment information of batch item # Get segmentation map and segment information of batch item
target_size = target_sizes[i] if target_sizes is not None else None target_size = target_sizes[i] if target_sizes is not None else None
segmentation, segments = compute_segments( segmentation, segments = compute_segments(
mask_probs_item, mask_probs=mask_probs_item,
pred_scores_item, pred_scores=pred_scores_item,
pred_labels_item, pred_labels=pred_labels_item,
mask_threshold, mask_threshold=mask_threshold,
overlap_mask_area_threshold, overlap_mask_area_threshold=overlap_mask_area_threshold,
label_ids_to_fuse, label_ids_to_fuse=label_ids_to_fuse,
target_size, target_size=target_size,
) )
results.append({"segmentation": segmentation, "segments_info": segments}) results.append({"segmentation": segmentation, "segments_info": segments})
......
...@@ -56,14 +56,15 @@ class ImageSegmentationPipeline(Pipeline): ...@@ -56,14 +56,15 @@ class ImageSegmentationPipeline(Pipeline):
def _sanitize_parameters(self, **kwargs): def _sanitize_parameters(self, **kwargs):
postprocess_kwargs = {} postprocess_kwargs = {}
if "task" in kwargs: if "subtask" in kwargs:
postprocess_kwargs["task"] = kwargs["task"] postprocess_kwargs["subtask"] = kwargs["subtask"]
if "threshold" in kwargs: if "threshold" in kwargs:
postprocess_kwargs["threshold"] = kwargs["threshold"] postprocess_kwargs["threshold"] = kwargs["threshold"]
if "mask_threshold" in kwargs: if "mask_threshold" in kwargs:
postprocess_kwargs["mask_threshold"] = kwargs["mask_threshold"] postprocess_kwargs["mask_threshold"] = kwargs["mask_threshold"]
if "overlap_mask_area_threshold" in kwargs: if "overlap_mask_area_threshold" in kwargs:
postprocess_kwargs["overlap_mask_area_threshold"] = kwargs["overlap_mask_area_threshold"] postprocess_kwargs["overlap_mask_area_threshold"] = kwargs["overlap_mask_area_threshold"]
return {}, {}, postprocess_kwargs return {}, {}, postprocess_kwargs
def __call__(self, images, **kwargs) -> Union[Predictions, List[Prediction]]: def __call__(self, images, **kwargs) -> Union[Predictions, List[Prediction]]:
...@@ -80,9 +81,10 @@ class ImageSegmentationPipeline(Pipeline): ...@@ -80,9 +81,10 @@ class ImageSegmentationPipeline(Pipeline):
The pipeline accepts either a single image or a batch of images. Images in a batch must all be in the The pipeline accepts either a single image or a batch of images. Images in a batch must all be in the
same format: all as HTTP(S) links, all as local paths, or all as PIL images. same format: all as HTTP(S) links, all as local paths, or all as PIL images.
subtask (`str`, defaults to `panoptic`): subtask (`str`, *optional*):
Segmentation task to be performed, choose [`semantic`, `instance` and `panoptic`] depending on model Segmentation task to be performed, choose [`semantic`, `instance` and `panoptic`] depending on model
capabilities. capabilities. If not set, the pipeline will attempt tp resolve in the following order:
`panoptic`, `instance`, `semantic`.
threshold (`float`, *optional*, defaults to 0.9): threshold (`float`, *optional*, defaults to 0.9):
Probability threshold to filter out predicted masks. Probability threshold to filter out predicted masks.
mask_threshold (`float`, *optional*, defaults to 0.5): mask_threshold (`float`, *optional*, defaults to 0.5):
...@@ -104,7 +106,6 @@ class ImageSegmentationPipeline(Pipeline): ...@@ -104,7 +106,6 @@ class ImageSegmentationPipeline(Pipeline):
- **score** (*optional* `float`) -- Optionally, when the model is capable of estimating a confidence of the - **score** (*optional* `float`) -- Optionally, when the model is capable of estimating a confidence of the
"object" described by the label and the mask. "object" described by the label and the mask.
""" """
return super().__call__(images, **kwargs) return super().__call__(images, **kwargs)
def preprocess(self, image): def preprocess(self, image):
...@@ -123,35 +124,15 @@ class ImageSegmentationPipeline(Pipeline): ...@@ -123,35 +124,15 @@ class ImageSegmentationPipeline(Pipeline):
def postprocess( def postprocess(
self, model_outputs, subtask=None, threshold=0.9, mask_threshold=0.5, overlap_mask_area_threshold=0.5 self, model_outputs, subtask=None, threshold=0.9, mask_threshold=0.5, overlap_mask_area_threshold=0.5
): ):
if (subtask == "panoptic" or subtask is None) and hasattr(
self.feature_extractor, "post_process_panoptic_segmentation"
):
outputs = self.feature_extractor.post_process_panoptic_segmentation(
model_outputs,
threshold=threshold,
mask_threshold=mask_threshold,
overlap_mask_area_threshold=overlap_mask_area_threshold,
target_sizes=model_outputs["target_size"],
)[0]
annotation = [] fn = None
segmentation = outputs["segmentation"] if subtask in {"panoptic", None} and hasattr(self.feature_extractor, "post_process_panoptic_segmentation"):
fn = self.feature_extractor.post_process_panoptic_segmentation
elif subtask in {"instance", None} and hasattr(self.feature_extractor, "post_process_instance_segmentation"):
fn = self.feature_extractor.post_process_instance_segmentation
if len(outputs["segments_info"]) == 0: if fn is not None:
mask = Image.fromarray(np.zeros(segmentation.shape).astype(np.uint8), mode="L") outputs = fn(
annotation.append({"mask": mask, "label": "NULL", "score": 0.0})
else:
for segment in outputs["segments_info"]:
mask = (segmentation == segment["id"]) * 255
mask = Image.fromarray(mask.numpy().astype(np.uint8), mode="L")
label = self.model.config.id2label[segment["label_id"]]
score = segment["score"]
annotation.append({"score": score, "label": label, "mask": mask})
elif (subtask == "instance" or subtask is None) and hasattr(
self.feature_extractor, "post_process_instance_segmentation"
):
outputs = self.feature_extractor.post_process_instance_segmentation(
model_outputs, model_outputs,
threshold=threshold, threshold=threshold,
mask_threshold=mask_threshold, mask_threshold=mask_threshold,
...@@ -162,20 +143,14 @@ class ImageSegmentationPipeline(Pipeline): ...@@ -162,20 +143,14 @@ class ImageSegmentationPipeline(Pipeline):
annotation = [] annotation = []
segmentation = outputs["segmentation"] segmentation = outputs["segmentation"]
if len(outputs["segments_info"]) == 0: for segment in outputs["segments_info"]:
mask = Image.fromarray(np.zeros(segmentation.shape).astype(np.uint8), mode="L") mask = (segmentation == segment["id"]) * 255
annotation.append({"mask": mask, "label": "NULL", "score": 0.0}) mask = Image.fromarray(mask.numpy().astype(np.uint8), mode="L")
else: label = self.model.config.id2label[segment["label_id"]]
for segment in outputs["segments_info"]: score = segment["score"]
mask = (segmentation == segment["id"]) * 255 annotation.append({"score": score, "label": label, "mask": mask})
mask = Image.fromarray(mask.numpy().astype(np.uint8), mode="L")
label = self.model.config.id2label[segment["label_id"]] elif subtask in {"semantic", None} and hasattr(self.feature_extractor, "post_process_semantic_segmentation"):
score = segment["score"]
annotation.append({"mask": mask, "label": label, "score": score})
elif (subtask == "semantic" or subtask is None) and hasattr(
self.feature_extractor, "post_process_semantic_segmentation"
):
outputs = self.feature_extractor.post_process_semantic_segmentation( outputs = self.feature_extractor.post_process_semantic_segmentation(
model_outputs, target_sizes=model_outputs["target_size"] model_outputs, target_sizes=model_outputs["target_size"]
)[0] )[0]
...@@ -190,5 +165,5 @@ class ImageSegmentationPipeline(Pipeline): ...@@ -190,5 +165,5 @@ class ImageSegmentationPipeline(Pipeline):
label = self.model.config.id2label[label] label = self.model.config.id2label[label]
annotation.append({"score": None, "label": label, "mask": mask}) annotation.append({"score": None, "label": label, "mask": mask})
else: else:
raise ValueError(f"Task {subtask} is not supported for model {self.model}.s") raise ValueError(f"Subtask {subtask} is not supported for model {type(self.model)}")
return annotation return annotation
...@@ -89,8 +89,8 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ...@@ -89,8 +89,8 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
) )
self.assertIsInstance(outputs, list) self.assertIsInstance(outputs, list)
n = len(outputs) n = len(outputs)
if isinstance(image_segmenter.model, (MaskFormerForInstanceSegmentation)): if isinstance(image_segmenter.model, (MaskFormerForInstanceSegmentation, DetrForSegmentation)):
# Instance segmentation (maskformer) have a slot for null class # Instance segmentation (maskformer, and detr) have a slot for null class
# and can output nothing even with a low threshold # and can output nothing even with a low threshold
self.assertGreaterEqual(n, 0) self.assertGreaterEqual(n, 0)
else: else:
...@@ -153,26 +153,53 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ...@@ -153,26 +153,53 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
def test_small_model_tf(self): def test_small_model_tf(self):
pass pass
@require_torch
def test_small_model_pt_no_panoptic(self):
model_id = "hf-internal-testing/tiny-random-mobilevit"
# The default task is `image-classification` we need to override
pipe = pipeline(task="image-segmentation", model=model_id)
# This model does NOT support neither `instance` nor `panoptic`
# We should error out
with self.assertRaises(ValueError) as e:
pipe("http://images.cocodataset.org/val2017/000000039769.jpg", subtask="panoptic")
self.assertEqual(
str(e.exception),
"Subtask panoptic is not supported for model <class"
" 'transformers.models.mobilevit.modeling_mobilevit.MobileViTForSemanticSegmentation'>",
)
with self.assertRaises(ValueError) as e:
pipe("http://images.cocodataset.org/val2017/000000039769.jpg", subtask="instance")
self.assertEqual(
str(e.exception),
"Subtask instance is not supported for model <class"
" 'transformers.models.mobilevit.modeling_mobilevit.MobileViTForSemanticSegmentation'>",
)
@require_torch @require_torch
def test_small_model_pt(self): def test_small_model_pt(self):
model_id = "hf-internal-testing/tiny-detr-mobilenetsv3-panoptic" model_id = "hf-internal-testing/tiny-detr-mobilenetsv3-panoptic"
model = AutoModelForImageSegmentation.from_pretrained(model_id) model = AutoModelForImageSegmentation.from_pretrained(model_id)
feature_extractor = AutoFeatureExtractor.from_pretrained(model_id) feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
image_segmenter = ImageSegmentationPipeline(model=model, feature_extractor=feature_extractor) image_segmenter = ImageSegmentationPipeline(
model=model,
outputs = image_segmenter( feature_extractor=feature_extractor,
"http://images.cocodataset.org/val2017/000000039769.jpg",
subtask="panoptic", subtask="panoptic",
threshold=0.0, threshold=0.0,
mask_threshold=0.0, mask_threshold=0.0,
overlap_mask_area_threshold=0.0, overlap_mask_area_threshold=0.0,
) )
outputs = image_segmenter(
"http://images.cocodataset.org/val2017/000000039769.jpg",
)
# Shortening by hashing # Shortening by hashing
for o in outputs: for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"]) o["mask"] = mask_to_test_readable(o["mask"])
# This is extremely brittle, and those values are made specific for the CI.
self.assertEqual( self.assertEqual(
nested_simplify(outputs, decimals=4), nested_simplify(outputs, decimals=4),
[ [
...@@ -189,9 +216,6 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ...@@ -189,9 +216,6 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
"http://images.cocodataset.org/val2017/000000039769.jpg", "http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg", "http://images.cocodataset.org/val2017/000000039769.jpg",
], ],
threshold=0.0,
mask_threshold=0.0,
overlap_mask_area_threshold=0.0,
) )
for output in outputs: for output in outputs:
for o in output: for o in output:
...@@ -217,6 +241,48 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ...@@ -217,6 +241,48 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
], ],
) )
output = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", subtask="instance")
for o in output:
o["mask"] = mask_to_test_readable(o["mask"])
self.assertEqual(
nested_simplify(output, decimals=4),
[
{
"score": 0.004,
"label": "LABEL_215",
"mask": {"hash": "a01498ca7c", "shape": (480, 640), "white_pixels": 307200},
},
],
)
# This must be surprising to the reader.
# The `panoptic` returns only LABEL_215, and this returns 3 labels.
#
output = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", subtask="semantic")
for o in output:
o["mask"] = mask_to_test_readable(o["mask"])
self.maxDiff = None
self.assertEqual(
nested_simplify(output, decimals=4),
[
{
"label": "LABEL_88",
"mask": {"hash": "7f0bf661a4", "shape": (480, 640), "white_pixels": 3},
"score": None,
},
{
"label": "LABEL_101",
"mask": {"hash": "10ab738dc9", "shape": (480, 640), "white_pixels": 8948},
"score": None,
},
{
"label": "LABEL_215",
"mask": {"hash": "b431e0946c", "shape": (480, 640), "white_pixels": 298249},
"score": None,
},
],
)
@require_torch @require_torch
def test_small_model_pt_semantic(self): def test_small_model_pt_semantic(self):
model_id = "hf-internal-testing/tiny-random-beit-pipeline" model_id = "hf-internal-testing/tiny-random-beit-pipeline"
...@@ -246,13 +312,15 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ...@@ -246,13 +312,15 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
@slow @slow
def test_integration_torch_image_segmentation(self): def test_integration_torch_image_segmentation(self):
model_id = "facebook/detr-resnet-50-panoptic" model_id = "facebook/detr-resnet-50-panoptic"
image_segmenter = pipeline("image-segmentation", model=model_id) image_segmenter = pipeline(
"image-segmentation",
model=model_id,
threshold=0.0,
overlap_mask_area_threshold=0.0,
)
outputs = image_segmenter( outputs = image_segmenter(
"http://images.cocodataset.org/val2017/000000039769.jpg", "http://images.cocodataset.org/val2017/000000039769.jpg",
subtask="panoptic",
threshold=0,
overlap_mask_area_threshold=0.0,
) )
# Shortening by hashing # Shortening by hashing
...@@ -300,9 +368,6 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ...@@ -300,9 +368,6 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
"http://images.cocodataset.org/val2017/000000039769.jpg", "http://images.cocodataset.org/val2017/000000039769.jpg",
"http://images.cocodataset.org/val2017/000000039769.jpg", "http://images.cocodataset.org/val2017/000000039769.jpg",
], ],
subtask="panoptic",
threshold=0.0,
overlap_mask_area_threshold=0.0,
) )
# Shortening by hashing # Shortening by hashing
...@@ -386,9 +451,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ...@@ -386,9 +451,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
model_id = "facebook/detr-resnet-50-panoptic" model_id = "facebook/detr-resnet-50-panoptic"
image_segmenter = pipeline("image-segmentation", model=model_id) image_segmenter = pipeline("image-segmentation", model=model_id)
outputs = image_segmenter( outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=0.999)
"http://images.cocodataset.org/val2017/000000039769.jpg", subtask="panoptic", threshold=0.999
)
# Shortening by hashing # Shortening by hashing
for o in outputs: for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"]) o["mask"] = mask_to_test_readable(o["mask"])
...@@ -409,9 +472,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ...@@ -409,9 +472,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
], ],
) )
outputs = image_segmenter( outputs = image_segmenter("http://images.cocodataset.org/val2017/000000039769.jpg", threshold=0.5)
"http://images.cocodataset.org/val2017/000000039769.jpg", subtask="panoptic", threshold=0.5
)
for o in outputs: for o in outputs:
o["mask"] = mask_to_test_readable(o["mask"]) o["mask"] = mask_to_test_readable(o["mask"])
...@@ -460,7 +521,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa ...@@ -460,7 +521,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
file = image[0]["file"] file = image[0]["file"]
outputs = image_segmenter(file, subtask="panoptic", threshold=threshold) outputs = image_segmenter(file, threshold=threshold)
# Shortening by hashing # Shortening by hashing
for o in outputs: for o in outputs:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment