Unverified Commit 88399476 authored by Bartosz Szmelczynski's avatar Bartosz Szmelczynski Committed by GitHub
Browse files

Fix bigbird random attention (#21023)

* switch np.random.permutation to jax.random.permuation

* remove comments

* remove leftover comment

* skip similarity tests

* modify indices_prng_key usage, add deterministic behaviour

* update style

* remove unused import

* remove copy statement since classes are not identical

* remove numpy import

* revert removing copied from statements

* make style from copied

* remove copied from statement

* update copied from statement to include only np.ndarry

* add deterministic args, unittestskip equivalence tests
parent 27b66bea
......@@ -14,10 +14,8 @@
import unittest
import numpy as np
from transformers import BigBirdConfig, is_flax_available
from transformers.testing_utils import require_flax, slow
from transformers.testing_utils import is_pt_flax_cross_test, require_flax, slow
from ...test_modeling_flax_common import FlaxModelTesterMixin, ids_tensor, random_attention_mask
......@@ -129,7 +127,11 @@ class FlaxBigBirdModelTester(unittest.TestCase):
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
config, input_ids, token_type_ids, attention_mask = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": attention_mask}
inputs_dict = {
"input_ids": input_ids,
"token_type_ids": token_type_ids,
"attention_mask": attention_mask,
}
return config, inputs_dict
......@@ -180,8 +182,7 @@ class FlaxBigBirdModelTest(FlaxModelTesterMixin, unittest.TestCase):
def test_model_from_pretrained(self):
for model_class_name in self.all_model_classes:
model = model_class_name.from_pretrained("google/bigbird-roberta-base")
outputs = model(np.ones((1, 1)))
self.assertIsNotNone(outputs)
self.assertIsNotNone(model)
def test_attention_outputs(self):
if self.test_attn_probs:
......@@ -220,3 +221,17 @@ class FlaxBigBirdModelTest(FlaxModelTesterMixin, unittest.TestCase):
return
else:
super().check_pt_flax_outputs(fx_outputs, pt_outputs, model_class, tol, name, attributes)
@is_pt_flax_cross_test
@unittest.skip(
reason="Current Pytorch implementation has bug with random attention -> it always uses it not matter if we are in eval/train mode"
)
def test_equivalence_flax_to_pt(self):
pass
@is_pt_flax_cross_test
@unittest.skip(
reason="Current Pytorch implementation has bug with random attention -> it always uses it not matter if we are in eval/train mode"
)
def test_equivalence_pt_to_flax(self):
pass
......@@ -158,7 +158,7 @@ class FlaxModelTesterMixin:
if "ForMultipleChoice" in model_class.__name__:
inputs_dict = {
k: jnp.broadcast_to(v[:, None], (v.shape[0], self.model_tester.num_choices, v.shape[-1]))
if isinstance(v, (jnp.ndarray, np.ndarray))
if isinstance(v, (jnp.ndarray, np.ndarray)) and k != "indices_prng_key"
else v
for k, v in inputs_dict.items()
}
......@@ -629,7 +629,6 @@ class FlaxModelTesterMixin:
def test_hidden_states_output(self):
def check_hidden_states_output(inputs_dict, config, model_class):
model = model_class(config)
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
hidden_states = outputs.encoder_hidden_states if config.is_encoder_decoder else outputs.hidden_states
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment