Unverified Commit 952a77b0 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

[Perceiver] Skip multi-gpu tests for now (#14813)

* [Perceiver] Skip multi-gpu tests for now

* Update tests/test_modeling_perceiver.py

* up

* up
parent 8a818c26
......@@ -86,6 +86,10 @@ is implemented in the library. Note that the models available in the library onl
with the Perceiver. There are many more use cases, including question answering, named-entity recognition, object detection,
audio classification, video classification, etc.
**Note**:
- Perceiver does **not** work with `torch.nn.DataParallel` due to a bug in PyTorch, see [issue #36035](https://github.com/pytorch/pytorch/issues/36035)
## Perceiver specific outputs
[[autodoc]] models.perceiver.modeling_perceiver.PerceiverModelOutput
......
......@@ -35,6 +35,11 @@ while being much more memory-efficient and much faster on long sequences.*
This model was contributed by `patrickvonplaten <https://huggingface.co/patrickvonplaten>`__. The Authors' code can be
found `here <https://github.com/google/trax/tree/master/trax/models/reformer>`__.
**Note**:
- Reformer does **not** work with `torch.nn.DataParallel` due to a bug in PyTorch, see `issue #36035
<https://github.com/pytorch/pytorch/issues/36035>`__
Axial Positional Encodings
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
......
......@@ -44,6 +44,11 @@ Tips:
This model was contributed by `thomwolf <https://huggingface.co/thomwolf>`__. The original code can be found `here
<https://github.com/kimiyoung/transformer-xl>`__.
**Note**:
- TransformerXL does **not** work with `torch.nn.DataParallel` due to a bug in PyTorch, see `issue #36035
<https://github.com/pytorch/pytorch/issues/36035>`__
TransfoXLConfig
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
......
......@@ -2128,7 +2128,9 @@ class PerceiverBasicDecoder(PerceiverAbstractDecoder):
# to get the indices for the unflattened array
# unravel_index returns a tuple (x_idx, y_idx, ...)
# stack to get the [n, d] tensor of coordinates
indices = list(torch.from_numpy(x) for x in np.unravel_index(subsampled_points, self.output_index_dims))
indices = list(
torch.from_numpy(x) for x in np.unravel_index(subsampled_points.cpu(), self.output_index_dims)
)
pos = torch.stack(indices, dim=1)
batch_size = inputs.shape[0]
# Map these coordinates to [-1, 1]
......
......@@ -758,29 +758,11 @@ class PerceiverModelTest(ModelTesterMixin, unittest.TestCase):
loss.backward()
@require_torch_multi_gpu
@unittest.skip(
reason="Perceiver does not work with data parallel (DP) because of a bug in PyTorch: https://github.com/pytorch/pytorch/issues/36035"
)
def test_multi_gpu_data_parallel_forward(self):
for model_class in self.all_model_classes:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class(model_class)
# some params shouldn't be scattered by nn.DataParallel
# so just remove them if they are present.
blacklist_non_batched_params = ["head_mask", "decoder_head_mask", "cross_attn_head_mask"]
for k in blacklist_non_batched_params:
inputs_dict.pop(k, None)
# move input tensors to cuda:O
for k, v in inputs_dict.items():
if torch.is_tensor(v):
inputs_dict[k] = v.to(0)
model = model_class(config=config)
model.to(0)
model.eval()
# Wrap model in nn.DataParallel
model = nn.DataParallel(model)
with torch.no_grad():
_ = model(**self._prepare_for_class(inputs_dict, model_class))
pass
@unittest.skip(reason="Perceiver models don't have a typical head like is the case with BERT")
def test_save_load_fast_init_from_base(self):
......
......@@ -573,8 +573,10 @@ class ReformerTesterMixin:
self.model_tester.create_and_check_reformer_model_fp16_generate(*config_and_inputs)
@require_torch_multi_gpu
@unittest.skip(
reason="Reformer does not work with data parallel (DP) because of a bug in PyTorch: https://github.com/pytorch/pytorch/issues/36035"
)
def test_multi_gpu_data_parallel_forward(self):
# Opt-out of this test.
pass
def test_for_sequence_classification(self):
......
......@@ -232,8 +232,10 @@ class TransfoXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestC
return
@require_torch_multi_gpu
@unittest.skip(
reason="Transfo-XL does not work with data parallel (DP) because of a bug in PyTorch: https://github.com/pytorch/pytorch/issues/36035"
)
def test_multi_gpu_data_parallel_forward(self):
# Opt-out of this test.
pass
@slow
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment