"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "b911c1f10ff8b31bdd0658dadaa0b1357fe47004"
Unverified Commit bdf36dcd authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Enable HF pretrained backbones (#31145)

* Enable load HF or tim backbone checkpoints

* Fix up

* Fix test - pass in proper out_indices

* Update docs

* Fix tvp tests

* Fix doc examples

* Fix doc examples

* Try to resolve DPT backbone param init

* Don't conditionally set to None

* Add condition based on whether backbone is defined

* Address review comments
parent a3d351c0
...@@ -21,6 +21,7 @@ import numpy as np ...@@ -21,6 +21,7 @@ import numpy as np
from tests.test_modeling_common import floats_tensor from tests.test_modeling_common import floats_tensor
from transformers import Mask2FormerConfig, is_torch_available, is_vision_available from transformers import Mask2FormerConfig, is_torch_available, is_vision_available
from transformers.testing_utils import ( from transformers.testing_utils import (
require_timm,
require_torch, require_torch,
require_torch_accelerator, require_torch_accelerator,
require_torch_fp16, require_torch_fp16,
...@@ -317,6 +318,37 @@ class Mask2FormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC ...@@ -317,6 +318,37 @@ class Mask2FormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
self.assertIsNotNone(transformer_decoder_hidden_states.grad) self.assertIsNotNone(transformer_decoder_hidden_states.grad)
self.assertIsNotNone(attentions.grad) self.assertIsNotNone(attentions.grad)
@require_timm
def test_backbone_selection(self):
config, inputs = self.model_tester.prepare_config_and_inputs_for_common()
config.backbone_config = None
config.backbone_kwargs = {"out_indices": [1, 2, 3]}
config.use_pretrained_backbone = True
# Load a timm backbone
# We can't load transformer checkpoint with timm backbone, as we can't specify features_only and out_indices
config.backbone = "resnet18"
config.use_timm_backbone = True
for model_class in self.all_model_classes:
model = model_class(config).to(torch_device).eval()
if model.__class__.__name__ == "Mask2FormerModel":
self.assertEqual(model.pixel_level_module.encoder.out_indices, [1, 2, 3])
elif model.__class__.__name__ == "Mask2FormerForUniversalSegmentation":
self.assertEqual(model.model.pixel_level_module.encoder.out_indices, [1, 2, 3])
# Load a HF backbone
config.backbone = "microsoft/resnet-18"
config.use_timm_backbone = False
for model_class in self.all_model_classes:
model = model_class(config).to(torch_device).eval()
if model.__class__.__name__ == "Mask2FormerModel":
self.assertEqual(model.pixel_level_module.encoder.out_indices, [1, 2, 3])
elif model.__class__.__name__ == "Mask2FormerForUniversalSegmentation":
self.assertEqual(model.model.pixel_level_module.encoder.out_indices, [1, 2, 3])
TOLERANCE = 1e-4 TOLERANCE = 1e-4
......
...@@ -22,6 +22,7 @@ import numpy as np ...@@ -22,6 +22,7 @@ import numpy as np
from tests.test_modeling_common import floats_tensor from tests.test_modeling_common import floats_tensor
from transformers import DetrConfig, MaskFormerConfig, SwinConfig, is_torch_available, is_vision_available from transformers import DetrConfig, MaskFormerConfig, SwinConfig, is_torch_available, is_vision_available
from transformers.testing_utils import ( from transformers.testing_utils import (
require_timm,
require_torch, require_torch,
require_torch_accelerator, require_torch_accelerator,
require_torch_fp16, require_torch_fp16,
...@@ -444,6 +445,37 @@ class MaskFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa ...@@ -444,6 +445,37 @@ class MaskFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
continue continue
recursive_check(model_batched_output[key], model_row_output[key], model_name, key) recursive_check(model_batched_output[key], model_row_output[key], model_name, key)
@require_timm
def test_backbone_selection(self):
config, inputs = self.model_tester.prepare_config_and_inputs_for_common()
config.backbone_config = None
config.backbone_kwargs = {"out_indices": [1, 2, 3]}
config.use_pretrained_backbone = True
# Load a timm backbone
# We can't load transformer checkpoint with timm backbone, as we can't specify features_only and out_indices
config.backbone = "resnet18"
config.use_timm_backbone = True
for model_class in self.all_model_classes:
model = model_class(config).to(torch_device).eval()
if model.__class__.__name__ == "MaskFormerModel":
self.assertEqual(model.pixel_level_module.encoder.out_indices, [1, 2, 3])
elif model.__class__.__name__ == "MaskFormerForUniversalSegmentation":
self.assertEqual(model.model.pixel_level_module.encoder.out_indices, [1, 2, 3])
# Load a HF backbone
config.backbone = "microsoft/resnet-18"
config.use_timm_backbone = False
for model_class in self.all_model_classes:
model = model_class(config).to(torch_device).eval()
if model.__class__.__name__ == "MaskFormerModel":
self.assertEqual(model.pixel_level_module.encoder.out_indices, [1, 2, 3])
elif model.__class__.__name__ == "MaskFormerForUniversalSegmentation":
self.assertEqual(model.model.pixel_level_module.encoder.out_indices, [1, 2, 3])
TOLERANCE = 1e-4 TOLERANCE = 1e-4
......
...@@ -23,6 +23,7 @@ import numpy as np ...@@ -23,6 +23,7 @@ import numpy as np
from tests.test_modeling_common import floats_tensor from tests.test_modeling_common import floats_tensor
from transformers import OneFormerConfig, is_torch_available, is_vision_available from transformers import OneFormerConfig, is_torch_available, is_vision_available
from transformers.testing_utils import ( from transformers.testing_utils import (
require_timm,
require_torch, require_torch,
require_torch_accelerator, require_torch_accelerator,
require_torch_fp16, require_torch_fp16,
...@@ -446,6 +447,37 @@ class OneFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas ...@@ -446,6 +447,37 @@ class OneFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
self.assertIsNotNone(transformer_decoder_mask_predictions.grad) self.assertIsNotNone(transformer_decoder_mask_predictions.grad)
self.assertIsNotNone(attentions.grad) self.assertIsNotNone(attentions.grad)
@require_timm
def test_backbone_selection(self):
config, inputs = self.model_tester.prepare_config_and_inputs_for_common()
config.backbone_config = None
config.backbone_kwargs = {"out_indices": [1, 2, 3]}
config.use_pretrained_backbone = True
# Load a timm backbone
# We can't load transformer checkpoint with timm backbone, as we can't specify features_only and out_indices
config.backbone = "resnet18"
config.use_timm_backbone = True
for model_class in self.all_model_classes:
model = model_class(config).to(torch_device).eval()
if model.__class__.__name__ == "OneFormerModel":
self.assertEqual(model.pixel_level_module.encoder.out_indices, [1, 2, 3])
elif model.__class__.__name__ == "OneFormerForUniversalSegmentation":
self.assertEqual(model.model.pixel_level_module.encoder.out_indices, [1, 2, 3])
# Load a HF backbone
config.backbone = "microsoft/resnet-18"
config.use_timm_backbone = False
for model_class in self.all_model_classes:
model = model_class(config).to(torch_device).eval()
if model.__class__.__name__ == "OneFormerModel":
self.assertEqual(model.pixel_level_module.encoder.out_indices, [1, 2, 3])
elif model.__class__.__name__ == "OneFormerForUniversalSegmentation":
self.assertEqual(model.model.pixel_level_module.encoder.out_indices, [1, 2, 3])
TOLERANCE = 1e-4 TOLERANCE = 1e-4
......
...@@ -485,6 +485,38 @@ class TableTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, Pipelin ...@@ -485,6 +485,38 @@ class TableTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, Pipelin
self.assertTrue(outputs) self.assertTrue(outputs)
def test_hf_backbone(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
# Load a pretrained HF checkpoint as backbone
config.backbone = "microsoft/resnet-18"
config.backbone_config = None
config.use_timm_backbone = False
config.use_pretrained_backbone = True
config.backbone_kwargs = {"out_indices": [2, 3, 4]}
for model_class in self.all_model_classes:
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
if model_class.__name__ == "TableTransformerForObjectDetection":
expected_shape = (
self.model_tester.batch_size,
self.model_tester.num_queries,
self.model_tester.num_labels + 1,
)
self.assertEqual(outputs.logits.shape, expected_shape)
# Confirm out_indices was propogated to backbone
self.assertEqual(len(model.model.backbone.conv_encoder.intermediate_channel_sizes), 3)
else:
# Confirm out_indices was propogated to backbone
self.assertEqual(len(model.backbone.conv_encoder.intermediate_channel_sizes), 3)
self.assertTrue(outputs)
def test_greyscale_images(self): def test_greyscale_images(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
......
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
import unittest import unittest
from transformers import ResNetConfig, TvpConfig from transformers import ResNetConfig, TimmBackboneConfig, TvpConfig
from transformers.testing_utils import require_torch, require_vision, torch_device from transformers.testing_utils import require_timm, require_torch, require_vision, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available from transformers.utils import cached_property, is_torch_available, is_vision_available
from ...test_modeling_common import ( from ...test_modeling_common import (
...@@ -211,6 +211,39 @@ class TVPModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): ...@@ -211,6 +211,39 @@ class TVPModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
msg=f"Parameter {name} of model {model_class} seems not properly initialized", msg=f"Parameter {name} of model {model_class} seems not properly initialized",
) )
@require_timm
def test_backbone_selection(self):
def _validate_backbone_init():
for model_class in self.all_model_classes:
model = model_class(config)
model.to(torch_device)
model.eval()
# Confirm out_indices propogated to backbone
if model.__class__.__name__ == "TvpModel":
self.assertEqual(len(model.vision_model.backbone.out_indices), 2)
elif model.__class__.__name__ == "TvpForVideoGrounding":
self.assertEqual(len(model.model.vision_model.backbone.out_indices), 2)
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
# Force load_backbone path
config.is_hybrid = False
# We load through configs, as the modeling file assumes config.backbone_config is always set
config.use_pretrained_backbone = False
config.backbone_kwargs = None
# Load a timm backbone
# We hack adding hidden_sizes to the config to test the backbone loading
backbone_config = TimmBackboneConfig("resnet18", out_indices=[-2, -1], hidden_sizes=[64, 128])
config.backbone_config = backbone_config
_validate_backbone_init()
# Load a HF backbone
backbone_config = ResNetConfig.from_pretrained("facebook/dinov2-small", out_indices=[-2, -1])
config.backbone_config = backbone_config
_validate_backbone_init()
# We will verify our results on an image of cute cats # We will verify our results on an image of cute cats
def prepare_img(): def prepare_img():
......
...@@ -19,7 +19,14 @@ import unittest ...@@ -19,7 +19,14 @@ import unittest
from huggingface_hub import hf_hub_download from huggingface_hub import hf_hub_download
from transformers import ConvNextConfig, UperNetConfig from transformers import ConvNextConfig, UperNetConfig
from transformers.testing_utils import require_torch, require_torch_multi_gpu, require_vision, slow, torch_device from transformers.testing_utils import (
require_timm,
require_torch,
require_torch_multi_gpu,
require_vision,
slow,
torch_device,
)
from transformers.utils import is_torch_available, is_vision_available from transformers.utils import is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
...@@ -240,6 +247,33 @@ class UperNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase) ...@@ -240,6 +247,33 @@ class UperNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
msg=f"Parameter {name} of model {model_class} seems not properly initialized", msg=f"Parameter {name} of model {model_class} seems not properly initialized",
) )
@require_timm
def test_backbone_selection(self):
config, inputs = self.model_tester.prepare_config_and_inputs_for_common()
config.backbone_config = None
config.backbone_kwargs = {"out_indices": [1, 2, 3]}
config.use_pretrained_backbone = True
# Load a timm backbone
# We can't load transformer checkpoint with timm backbone, as we can't specify features_only and out_indices
config.backbone = "resnet18"
config.use_timm_backbone = True
for model_class in self.all_model_classes:
model = model_class(config).to(torch_device).eval()
if model.__class__.__name__ == "UperNetForUniversalSegmentation":
self.assertEqual(model.backbone.out_indices, [1, 2, 3])
# Load a HF backbone
config.backbone = "microsoft/resnet-18"
config.use_timm_backbone = False
for model_class in self.all_model_classes:
model = model_class(config).to(torch_device).eval()
if model.__class__.__name__ == "UperNetForUniversalSegmentation":
self.assertEqual(model.backbone.out_indices, [1, 2, 3])
@unittest.skip(reason="UperNet does not have tied weights") @unittest.skip(reason="UperNet does not have tied weights")
def test_tied_model_weights_key_ignore(self): def test_tied_model_weights_key_ignore(self):
pass pass
......
...@@ -20,6 +20,7 @@ from huggingface_hub import hf_hub_download ...@@ -20,6 +20,7 @@ from huggingface_hub import hf_hub_download
from transformers import VitMatteConfig from transformers import VitMatteConfig
from transformers.testing_utils import ( from transformers.testing_utils import (
require_timm,
require_torch, require_torch,
slow, slow,
torch_device, torch_device,
...@@ -236,6 +237,35 @@ class VitMatteModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase ...@@ -236,6 +237,35 @@ class VitMatteModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
check_hidden_states_output(inputs_dict, config, model_class) check_hidden_states_output(inputs_dict, config, model_class)
@require_timm
def test_backbone_selection(self):
def _validate_backbone_init():
for model_class in self.all_model_classes:
model = model_class(config)
model.to(torch_device)
model.eval()
if model.__class__.__name__ == "VitMatteForImageMatting":
# Confirm out_indices propogated to backbone
self.assertEqual(len(model.backbone.out_indices), 2)
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.use_pretrained_backbone = True
config.backbone_config = None
config.backbone_kwargs = {"out_indices": [-2, -1]}
# Force load_backbone path
config.is_hybrid = False
# Load a timm backbone
config.backbone = "resnet18"
config.use_timm_backbone = True
_validate_backbone_init()
# Load a HF backbone
config.backbone = "facebook/dinov2-small"
config.use_timm_backbone = False
_validate_backbone_init()
@require_torch @require_torch
class VitMatteModelIntegrationTest(unittest.TestCase): class VitMatteModelIntegrationTest(unittest.TestCase):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment