Unverified Commit 260b9d21 authored by Matt's avatar Matt Committed by GitHub
Browse files

Even more TF test fixes (#28146)

* Fix vision text dual encoder

* Small cleanup for wav2vec2 (not fixed yet)

* Small fix for vision_encoder_decoder

* Fix SAM builds

* Update TFBertTokenizer test with modern exporting + tokenizer

* Fix DeBERTa

* Fix DeBERTav2

* Try RAG fix but it's impossible to test locally

* Actually fix RAG now that I got FAISS working somehow

* Fix Wav2Vec2, add sermon

* Fix Hubert
parent f9a98c47
...@@ -57,7 +57,7 @@ def convert_tf_weight_name_to_pt_weight_name( ...@@ -57,7 +57,7 @@ def convert_tf_weight_name_to_pt_weight_name(
transposed with regards to each other transposed with regards to each other
""" """
if name_scope is not None: if name_scope is not None:
if not tf_name.startswith(name_scope): if not tf_name.startswith(name_scope) and "final_logits_bias" not in tf_name:
raise ValueError( raise ValueError(
f"Weight name {tf_name} does not start with name_scope {name_scope}. This is an internal error " f"Weight name {tf_name} does not start with name_scope {name_scope}. This is an internal error "
"in Transformers, so (unless you were doing something really evil) please open an issue to report it!" "in Transformers, so (unless you were doing something really evil) please open an issue to report it!"
......
...@@ -638,10 +638,10 @@ class TFDebertaDisentangledSelfAttention(tf.keras.layers.Layer): ...@@ -638,10 +638,10 @@ class TFDebertaDisentangledSelfAttention(tf.keras.layers.Layer):
self.pos_dropout.build(None) self.pos_dropout.build(None)
if getattr(self, "pos_proj", None) is not None: if getattr(self, "pos_proj", None) is not None:
with tf.name_scope(self.pos_proj.name): with tf.name_scope(self.pos_proj.name):
self.pos_proj.build(None) self.pos_proj.build([self.config.hidden_size])
if getattr(self, "pos_q_proj", None) is not None: if getattr(self, "pos_q_proj", None) is not None:
with tf.name_scope(self.pos_q_proj.name): with tf.name_scope(self.pos_q_proj.name):
self.pos_q_proj.build(None) self.pos_q_proj.build([self.config.hidden_size])
def transpose_for_scores(self, tensor: tf.Tensor) -> tf.Tensor: def transpose_for_scores(self, tensor: tf.Tensor) -> tf.Tensor:
shape = shape_list(tensor)[:-1] + [self.num_attention_heads, -1] shape = shape_list(tensor)[:-1] + [self.num_attention_heads, -1]
......
...@@ -362,6 +362,9 @@ class TFDebertaV2ConvLayer(tf.keras.layers.Layer): ...@@ -362,6 +362,9 @@ class TFDebertaV2ConvLayer(tf.keras.layers.Layer):
self.config = config self.config = config
def build(self, input_shape=None): def build(self, input_shape=None):
if self.built:
return
self.built = True
with tf.name_scope("conv"): with tf.name_scope("conv"):
self.conv_kernel = self.add_weight( self.conv_kernel = self.add_weight(
name="kernel", name="kernel",
...@@ -371,13 +374,9 @@ class TFDebertaV2ConvLayer(tf.keras.layers.Layer): ...@@ -371,13 +374,9 @@ class TFDebertaV2ConvLayer(tf.keras.layers.Layer):
self.conv_bias = self.add_weight( self.conv_bias = self.add_weight(
name="bias", shape=[self.config.hidden_size], initializer=tf.zeros_initializer() name="bias", shape=[self.config.hidden_size], initializer=tf.zeros_initializer()
) )
return
if self.built:
return
self.built = True
if getattr(self, "LayerNorm", None) is not None: if getattr(self, "LayerNorm", None) is not None:
with tf.name_scope(self.LayerNorm.name): with tf.name_scope(self.LayerNorm.name):
self.LayerNorm.build(None) self.LayerNorm.build([None, None, self.config.hidden_size])
if getattr(self, "dropout", None) is not None: if getattr(self, "dropout", None) is not None:
with tf.name_scope(self.dropout.name): with tf.name_scope(self.dropout.name):
self.dropout.build(None) self.dropout.build(None)
...@@ -453,7 +452,7 @@ class TFDebertaV2Encoder(tf.keras.layers.Layer): ...@@ -453,7 +452,7 @@ class TFDebertaV2Encoder(tf.keras.layers.Layer):
self.conv.build(None) self.conv.build(None)
if getattr(self, "LayerNorm", None) is not None: if getattr(self, "LayerNorm", None) is not None:
with tf.name_scope(self.LayerNorm.name): with tf.name_scope(self.LayerNorm.name):
self.LayerNorm.build([None, None, self.config.hidden_size]) self.LayerNorm.build([None, self.config.hidden_size])
if getattr(self, "layer", None) is not None: if getattr(self, "layer", None) is not None:
for layer in self.layer: for layer in self.layer:
with tf.name_scope(layer.name): with tf.name_scope(layer.name):
......
...@@ -401,7 +401,6 @@ class TFHubertWeightNormConv1D(tf.keras.layers.Conv1D): ...@@ -401,7 +401,6 @@ class TFHubertWeightNormConv1D(tf.keras.layers.Conv1D):
) )
self.explicit_padding = explicit_padding self.explicit_padding = explicit_padding
self.filter_axis = 2 self.filter_axis = 2
self.initialized = False
self.kernel_norm_axes = tf.constant([0, 1]) self.kernel_norm_axes = tf.constant([0, 1])
def _init_norm(self): def _init_norm(self):
...@@ -428,13 +427,13 @@ class TFHubertWeightNormConv1D(tf.keras.layers.Conv1D): ...@@ -428,13 +427,13 @@ class TFHubertWeightNormConv1D(tf.keras.layers.Conv1D):
dtype=self.weight_v.dtype, dtype=self.weight_v.dtype,
trainable=True, trainable=True,
) )
self._init_norm()
self.bias = self.add_weight(name="bias", shape=(self.filters,), initializer="zeros", trainable=True) self.bias = self.add_weight(name="bias", shape=(self.filters,), initializer="zeros", trainable=True)
def call(self, inputs): def call(self, inputs):
if not self.initialized: # TODO Matt: Assigning to attributes in call() is deeply sinful in TensorFlow, as it should be idempotent.
self._init_norm() # This whole layer should be replaced by a layer that doesn't inherit from Conv1D, but instead calls
self.initialized = True # a functional 1d convolution with normalized weights that it generates (but does not store!)
self._normalize_kernel() self._normalize_kernel()
padded_inputs = tf.pad(inputs, ((0, 0), (self.explicit_padding, self.explicit_padding), (0, 0))) padded_inputs = tf.pad(inputs, ((0, 0), (self.explicit_padding, self.explicit_padding), (0, 0)))
......
...@@ -720,6 +720,15 @@ class TFRagModel(TFRagPreTrainedModel): ...@@ -720,6 +720,15 @@ class TFRagModel(TFRagPreTrainedModel):
generator_dec_attentions=gen_outputs.decoder_attentions, generator_dec_attentions=gen_outputs.decoder_attentions,
) )
def build(self, input_shape=None):
if self.built:
return
self.built = True
with tf.name_scope(self.generator.name):
self.generator.build(None)
with tf.name_scope(self.question_encoder.name):
self.question_encoder.build(None)
@add_start_docstrings_to_model_forward( @add_start_docstrings_to_model_forward(
""" """
......
...@@ -604,6 +604,9 @@ class TFSamMaskDecoder(tf.keras.layers.Layer): ...@@ -604,6 +604,9 @@ class TFSamMaskDecoder(tf.keras.layers.Layer):
if getattr(self, "iou_prediction_head", None) is not None: if getattr(self, "iou_prediction_head", None) is not None:
with tf.name_scope(self.iou_prediction_head.name): with tf.name_scope(self.iou_prediction_head.name):
self.iou_prediction_head.build(None) self.iou_prediction_head.build(None)
for mlp in self.output_hypernetworks_mlps:
with tf.name_scope(mlp.name):
mlp.build(None)
def call( def call(
self, self,
......
...@@ -247,16 +247,16 @@ class TFVisionTextDualEncoderModel(TFPreTrainedModel): ...@@ -247,16 +247,16 @@ class TFVisionTextDualEncoderModel(TFPreTrainedModel):
# However, the name of that extra layer is the name of the MainLayer in the base model. # However, the name of that extra layer is the name of the MainLayer in the base model.
if "vision_model" in tf_weight: if "vision_model" in tf_weight:
if tf_weight.count("vision_model") == 1: if tf_weight.count("vision_model") == 1:
return re.sub(r"vision_model\..*?\.", "vision_model.", tf_weight) return (re.sub(r"vision_model\..*?\.", "vision_model.", tf_weight),)
elif tf_weight.count("vision_model") == 2: elif tf_weight.count("vision_model") == 2:
return re.sub(r"vision_model\..*?\.vision_model", "vision_model.vision_model", tf_weight) return (re.sub(r"vision_model\..*?\.vision_model", "vision_model.vision_model", tf_weight),)
else: else:
raise ValueError( raise ValueError(
f"Unexpected weight name {tf_weight}. Please file an issue on the" f"Unexpected weight name {tf_weight}. Please file an issue on the"
" Transformers repo to let us know about this error!" " Transformers repo to let us know about this error!"
) )
elif "text_model" in tf_weight: elif "text_model" in tf_weight:
return re.sub(r"text_model\..*?\.", "text_model.", tf_weight) return (re.sub(r"text_model\..*?\.", "text_model.", tf_weight),)
else: else:
return (tf_weight,) return (tf_weight,)
...@@ -598,7 +598,7 @@ class TFVisionTextDualEncoderModel(TFPreTrainedModel): ...@@ -598,7 +598,7 @@ class TFVisionTextDualEncoderModel(TFPreTrainedModel):
if text_model.name != "text_model": if text_model.name != "text_model":
raise ValueError("text model must be created with the name `text_model`.") raise ValueError("text model must be created with the name `text_model`.")
model.build() # Ensure model is fully built model.build_in_name_scope() # Ensure model is fully built
return model return model
......
...@@ -435,7 +435,6 @@ class TFWav2Vec2WeightNormConv1D(tf.keras.layers.Conv1D): ...@@ -435,7 +435,6 @@ class TFWav2Vec2WeightNormConv1D(tf.keras.layers.Conv1D):
) )
self.explicit_padding = explicit_padding self.explicit_padding = explicit_padding
self.filter_axis = 2 self.filter_axis = 2
self.initialized = False
self.kernel_norm_axes = tf.constant([0, 1]) self.kernel_norm_axes = tf.constant([0, 1])
def _init_norm(self): def _init_norm(self):
...@@ -462,13 +461,13 @@ class TFWav2Vec2WeightNormConv1D(tf.keras.layers.Conv1D): ...@@ -462,13 +461,13 @@ class TFWav2Vec2WeightNormConv1D(tf.keras.layers.Conv1D):
dtype=self.weight_v.dtype, dtype=self.weight_v.dtype,
trainable=True, trainable=True,
) )
self._init_norm()
self.bias = self.add_weight(name="bias", shape=(self.filters,), initializer="zeros", trainable=True) self.bias = self.add_weight(name="bias", shape=(self.filters,), initializer="zeros", trainable=True)
def call(self, inputs): def call(self, inputs):
if not self.initialized: # TODO Matt: Assigning to attributes in call() is deeply sinful in TensorFlow, as it should be idempotent.
self._init_norm() # This whole layer should be replaced by a layer that doesn't inherit from Conv1D, but instead calls
self.initialized = True # a functional 1d convolution with normalized weights that it generates (but does not store!)
self._normalize_kernel() self._normalize_kernel()
padded_inputs = tf.pad(inputs, ((0, 0), (self.explicit_padding, self.explicit_padding), (0, 0))) padded_inputs = tf.pad(inputs, ((0, 0), (self.explicit_padding, self.explicit_padding), (0, 0)))
...@@ -1208,13 +1207,13 @@ class TFWav2Vec2MainLayer(tf.keras.layers.Layer): ...@@ -1208,13 +1207,13 @@ class TFWav2Vec2MainLayer(tf.keras.layers.Layer):
self.encoder = TFWav2Vec2Encoder(config, name="encoder") self.encoder = TFWav2Vec2Encoder(config, name="encoder")
def build(self, input_shape=None): def build(self, input_shape=None):
self.masked_spec_embed = self.add_weight(
shape=(self.config.hidden_size,), initializer="uniform", trainable=True, name="masked_spec_embed"
)
if self.built: if self.built:
return return
self.built = True self.built = True
if self.config.mask_time_prob > 0.0 or self.config.mask_feature_prob > 0.0:
self.masked_spec_embed = self.add_weight(
shape=(self.config.hidden_size,), initializer="uniform", trainable=True, name="masked_spec_embed"
)
if getattr(self, "feature_extractor", None) is not None: if getattr(self, "feature_extractor", None) is not None:
with tf.name_scope(self.feature_extractor.name): with tf.name_scope(self.feature_extractor.name):
self.feature_extractor.build(None) self.feature_extractor.build(None)
......
...@@ -28,7 +28,7 @@ if is_tf_available(): ...@@ -28,7 +28,7 @@ if is_tf_available():
def call(self, inputs): def call(self, inputs):
tokenized = self.tokenizer(inputs) tokenized = self.tokenizer(inputs)
out = self.bert(**tokenized) out = self.bert(tokenized)
return out["pooler_output"] return out["pooler_output"]
...@@ -41,13 +41,8 @@ class BertTokenizationTest(unittest.TestCase): ...@@ -41,13 +41,8 @@ class BertTokenizationTest(unittest.TestCase):
def setUp(self): def setUp(self):
super().setUp() super().setUp()
self.tokenizers = [ self.tokenizers = [BertTokenizer.from_pretrained(checkpoint) for checkpoint in TOKENIZER_CHECKPOINTS]
BertTokenizer.from_pretrained(checkpoint) for checkpoint in (TOKENIZER_CHECKPOINTS * 2) self.tf_tokenizers = [TFBertTokenizer.from_pretrained(checkpoint) for checkpoint in TOKENIZER_CHECKPOINTS]
] # repeat for when fast_bert_tokenizer=false
self.tf_tokenizers = [TFBertTokenizer.from_pretrained(checkpoint) for checkpoint in TOKENIZER_CHECKPOINTS] + [
TFBertTokenizer.from_pretrained(checkpoint, use_fast_bert_tokenizer=False)
for checkpoint in TOKENIZER_CHECKPOINTS
]
assert len(self.tokenizers) == len(self.tf_tokenizers) assert len(self.tokenizers) == len(self.tf_tokenizers)
self.test_sentences = [ self.test_sentences = [
...@@ -94,15 +89,15 @@ class BertTokenizationTest(unittest.TestCase): ...@@ -94,15 +89,15 @@ class BertTokenizationTest(unittest.TestCase):
self.assertTrue(tf.reduce_all(eager_outputs[key] == compiled_outputs[key])) self.assertTrue(tf.reduce_all(eager_outputs[key] == compiled_outputs[key]))
@slow @slow
def test_saved_model(self): def test_export_for_inference(self):
for tf_tokenizer in self.tf_tokenizers: for tf_tokenizer in self.tf_tokenizers:
model = ModelToSave(tokenizer=tf_tokenizer) model = ModelToSave(tokenizer=tf_tokenizer)
test_inputs = tf.convert_to_tensor(self.test_sentences) test_inputs = tf.convert_to_tensor(self.test_sentences)
out = model(test_inputs) # Build model with some sample inputs out = model(test_inputs) # Build model with some sample inputs
with TemporaryDirectory() as tempdir: with TemporaryDirectory() as tempdir:
save_path = Path(tempdir) / "saved.model" save_path = Path(tempdir) / "saved.model"
model.save(save_path) model.export(save_path)
loaded_model = tf.keras.models.load_model(save_path) loaded_model = tf.saved_model.load(save_path)
loaded_output = loaded_model(test_inputs) loaded_output = loaded_model.serve(test_inputs)
# We may see small differences because the loaded model is compiled, so we need an epsilon for the test # We may see small differences because the loaded model is compiled, so we need an epsilon for the test
self.assertLessEqual(tf.reduce_max(tf.abs(out - loaded_output)), 1e-5) self.assertLessEqual(tf.reduce_max(tf.abs(out - loaded_output)), 1e-5)
...@@ -1005,6 +1005,7 @@ class TFRagModelSaveLoadTests(unittest.TestCase): ...@@ -1005,6 +1005,7 @@ class TFRagModelSaveLoadTests(unittest.TestCase):
retriever=rag_retriever, retriever=rag_retriever,
config=rag_config, config=rag_config,
) )
rag_sequence.build_in_name_scope()
# check that the from pretrained methods work # check that the from pretrained methods work
rag_sequence.save_pretrained(tmp_dirname) rag_sequence.save_pretrained(tmp_dirname)
rag_sequence.from_pretrained(tmp_dirname, retriever=rag_retriever) rag_sequence.from_pretrained(tmp_dirname, retriever=rag_retriever)
...@@ -1056,6 +1057,7 @@ class TFRagModelSaveLoadTests(unittest.TestCase): ...@@ -1056,6 +1057,7 @@ class TFRagModelSaveLoadTests(unittest.TestCase):
retriever=rag_retriever, retriever=rag_retriever,
config=rag_config, config=rag_config,
) )
rag_token.build_in_name_scope()
# check that the from pretrained methods work # check that the from pretrained methods work
rag_token.save_pretrained(tmp_dirname) rag_token.save_pretrained(tmp_dirname)
rag_token.from_pretrained(tmp_dirname, retriever=rag_retriever) rag_token.from_pretrained(tmp_dirname, retriever=rag_retriever)
......
...@@ -858,6 +858,7 @@ class TFVisionEncoderDecoderModelSaveLoadTests(unittest.TestCase): ...@@ -858,6 +858,7 @@ class TFVisionEncoderDecoderModelSaveLoadTests(unittest.TestCase):
pretrained_encoder_dir, pretrained_encoder_dir,
pretrained_decoder_dir, pretrained_decoder_dir,
) )
enc_dec_model.build_in_name_scope()
# check that the from pretrained methods work # check that the from pretrained methods work
enc_dec_model.save_pretrained(tmp_dirname) enc_dec_model.save_pretrained(tmp_dirname)
enc_dec_model = TFVisionEncoderDecoderModel.from_pretrained(tmp_dirname) enc_dec_model = TFVisionEncoderDecoderModel.from_pretrained(tmp_dirname)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment