Unverified Commit 7f998612 authored by NielsRogge's avatar NielsRogge Committed by GitHub
Browse files

Add Universal Segmentation class + mapping (#20766)



* Add mapping

* Add mapping to pipeline

* Apply suggestions

* Fix feature extractor tests

* Use ForInstance, add model to universal mapping

* More fixes

* Remove model from deprecated objectsé
Co-authored-by: default avatarNiels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
parent e65445b4
...@@ -254,6 +254,10 @@ The following auto classes are available for the following computer vision tasks ...@@ -254,6 +254,10 @@ The following auto classes are available for the following computer vision tasks
[[autodoc]] AutoModelForInstanceSegmentation [[autodoc]] AutoModelForInstanceSegmentation
### AutoModelForUniversalSegmentation
[[autodoc]] AutoModelForUniversalSegmentation
### AutoModelForZeroShotObjectDetection ### AutoModelForZeroShotObjectDetection
[[autodoc]] AutoModelForZeroShotObjectDetection [[autodoc]] AutoModelForZeroShotObjectDetection
......
...@@ -943,6 +943,7 @@ else: ...@@ -943,6 +943,7 @@ else:
"MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING", "MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING",
"MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING", "MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING",
"MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING", "MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING",
"MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING",
"MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING", "MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING",
"MODEL_FOR_VISION_2_SEQ_MAPPING", "MODEL_FOR_VISION_2_SEQ_MAPPING",
"MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING", "MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING",
...@@ -974,6 +975,7 @@ else: ...@@ -974,6 +975,7 @@ else:
"AutoModelForSpeechSeq2Seq", "AutoModelForSpeechSeq2Seq",
"AutoModelForTableQuestionAnswering", "AutoModelForTableQuestionAnswering",
"AutoModelForTokenClassification", "AutoModelForTokenClassification",
"AutoModelForUniversalSegmentation",
"AutoModelForVideoClassification", "AutoModelForVideoClassification",
"AutoModelForVision2Seq", "AutoModelForVision2Seq",
"AutoModelForVisualQuestionAnswering", "AutoModelForVisualQuestionAnswering",
...@@ -4113,6 +4115,7 @@ if TYPE_CHECKING: ...@@ -4113,6 +4115,7 @@ if TYPE_CHECKING:
MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING, MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING,
MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING, MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING,
MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING, MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING,
MODEL_FOR_VISION_2_SEQ_MAPPING, MODEL_FOR_VISION_2_SEQ_MAPPING,
MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING, MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING,
...@@ -4144,6 +4147,7 @@ if TYPE_CHECKING: ...@@ -4144,6 +4147,7 @@ if TYPE_CHECKING:
AutoModelForSpeechSeq2Seq, AutoModelForSpeechSeq2Seq,
AutoModelForTableQuestionAnswering, AutoModelForTableQuestionAnswering,
AutoModelForTokenClassification, AutoModelForTokenClassification,
AutoModelForUniversalSegmentation,
AutoModelForVideoClassification, AutoModelForVideoClassification,
AutoModelForVision2Seq, AutoModelForVision2Seq,
AutoModelForVisualQuestionAnswering, AutoModelForVisualQuestionAnswering,
......
...@@ -67,6 +67,7 @@ else: ...@@ -67,6 +67,7 @@ else:
"MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING", "MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING",
"MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING", "MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING",
"MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING", "MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING",
"MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING",
"MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING", "MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING",
"MODEL_FOR_VISION_2_SEQ_MAPPING", "MODEL_FOR_VISION_2_SEQ_MAPPING",
"MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING", "MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING",
...@@ -97,6 +98,7 @@ else: ...@@ -97,6 +98,7 @@ else:
"AutoModelForSpeechSeq2Seq", "AutoModelForSpeechSeq2Seq",
"AutoModelForTableQuestionAnswering", "AutoModelForTableQuestionAnswering",
"AutoModelForTokenClassification", "AutoModelForTokenClassification",
"AutoModelForUniversalSegmentation",
"AutoModelForVideoClassification", "AutoModelForVideoClassification",
"AutoModelForVision2Seq", "AutoModelForVision2Seq",
"AutoModelForVisualQuestionAnswering", "AutoModelForVisualQuestionAnswering",
...@@ -222,6 +224,7 @@ if TYPE_CHECKING: ...@@ -222,6 +224,7 @@ if TYPE_CHECKING:
MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING, MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING,
MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING, MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING,
MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING, MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING,
MODEL_FOR_VISION_2_SEQ_MAPPING, MODEL_FOR_VISION_2_SEQ_MAPPING,
MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING, MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING,
...@@ -253,6 +256,7 @@ if TYPE_CHECKING: ...@@ -253,6 +256,7 @@ if TYPE_CHECKING:
AutoModelForSpeechSeq2Seq, AutoModelForSpeechSeq2Seq,
AutoModelForTableQuestionAnswering, AutoModelForTableQuestionAnswering,
AutoModelForTokenClassification, AutoModelForTokenClassification,
AutoModelForUniversalSegmentation,
AutoModelForVideoClassification, AutoModelForVideoClassification,
AutoModelForVision2Seq, AutoModelForVision2Seq,
AutoModelForVisualQuestionAnswering, AutoModelForVisualQuestionAnswering,
......
...@@ -434,6 +434,15 @@ MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES = OrderedDict( ...@@ -434,6 +434,15 @@ MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES = OrderedDict(
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES = OrderedDict( MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES = OrderedDict(
[ [
# Model for Instance Segmentation mapping # Model for Instance Segmentation mapping
# MaskFormerForInstanceSegmentation can be removed from this mapping in v5
("maskformer", "MaskFormerForInstanceSegmentation"),
]
)
MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES = OrderedDict(
[
# Model for Universal Segmentation mapping
("detr", "DetrForSegmentation"),
("maskformer", "MaskFormerForInstanceSegmentation"), ("maskformer", "MaskFormerForInstanceSegmentation"),
] ]
) )
...@@ -892,6 +901,9 @@ MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING = _LazyAutoMapping( ...@@ -892,6 +901,9 @@ MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING = _LazyAutoMapping(
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING = _LazyAutoMapping( MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES CONFIG_MAPPING_NAMES, MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES
) )
MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES
)
MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING = _LazyAutoMapping( MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES CONFIG_MAPPING_NAMES, MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES
) )
...@@ -1083,6 +1095,15 @@ AutoModelForSemanticSegmentation = auto_class_update( ...@@ -1083,6 +1095,15 @@ AutoModelForSemanticSegmentation = auto_class_update(
) )
class AutoModelForUniversalSegmentation(_BaseAutoModelClass):
_model_mapping = MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING
AutoModelForUniversalSegmentation = auto_class_update(
AutoModelForUniversalSegmentation, head_doc="universal image segmentation"
)
class AutoModelForInstanceSegmentation(_BaseAutoModelClass): class AutoModelForInstanceSegmentation(_BaseAutoModelClass):
_model_mapping = MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING _model_mapping = MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING
......
...@@ -16,6 +16,7 @@ if is_torch_available(): ...@@ -16,6 +16,7 @@ if is_torch_available():
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING, MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING, MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING, MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING,
MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING,
) )
...@@ -75,6 +76,7 @@ class ImageSegmentationPipeline(Pipeline): ...@@ -75,6 +76,7 @@ class ImageSegmentationPipeline(Pipeline):
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING.items() MODEL_FOR_IMAGE_SEGMENTATION_MAPPING.items()
+ MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING.items() + MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING.items()
+ MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() + MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items()
+ MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING.items()
) )
) )
......
...@@ -446,6 +446,9 @@ MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING = None ...@@ -446,6 +446,9 @@ MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING = None
MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = None MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = None
MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING = None
MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING = None MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING = None
...@@ -639,6 +642,13 @@ class AutoModelForTokenClassification(metaclass=DummyObject): ...@@ -639,6 +642,13 @@ class AutoModelForTokenClassification(metaclass=DummyObject):
requires_backends(self, ["torch"]) requires_backends(self, ["torch"])
class AutoModelForUniversalSegmentation(metaclass=DummyObject):
_backends = ["torch"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])
class AutoModelForVideoClassification(metaclass=DummyObject): class AutoModelForVideoClassification(metaclass=DummyObject):
_backends = ["torch"] _backends = ["torch"]
......
...@@ -239,7 +239,6 @@ IGNORE_NON_AUTO_CONFIGURED = PRIVATE_MODELS.copy() + [ ...@@ -239,7 +239,6 @@ IGNORE_NON_AUTO_CONFIGURED = PRIVATE_MODELS.copy() + [
"VisualBertForMultipleChoice", "VisualBertForMultipleChoice",
"TFWav2Vec2ForCTC", "TFWav2Vec2ForCTC",
"TFHubertForCTC", "TFHubertForCTC",
"MaskFormerForInstanceSegmentation",
"XCLIPVisionModel", "XCLIPVisionModel",
"XCLIPTextModel", "XCLIPTextModel",
] ]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment