Unverified Commit 071529bd authored by Matt's avatar Matt Committed by GitHub
Browse files

Use return_tensors="np" instead of "tf" (#21266)

Return NP instead of TF tensors for our data loading pipeline
parent f0fc7912
...@@ -499,7 +499,7 @@ def main(): ...@@ -499,7 +499,7 @@ def main():
# region TF Dataset preparation # region TF Dataset preparation
num_replicas = training_args.strategy.num_replicas_in_sync num_replicas = training_args.strategy.num_replicas_in_sync
data_collator = DataCollatorForLanguageModeling( data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer, mlm_probability=data_args.mlm_probability, return_tensors="tf" tokenizer=tokenizer, mlm_probability=data_args.mlm_probability, return_tensors="np"
) )
options = tf.data.Options() options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
......
...@@ -105,7 +105,7 @@ class DataCollatorForMultipleChoice: ...@@ -105,7 +105,7 @@ class DataCollatorForMultipleChoice:
padding=self.padding, padding=self.padding,
max_length=self.max_length, max_length=self.max_length,
pad_to_multiple_of=self.pad_to_multiple_of, pad_to_multiple_of=self.pad_to_multiple_of,
return_tensors="tf", return_tensors="np",
) )
# Un-flatten # Un-flatten
...@@ -410,7 +410,7 @@ def main(): ...@@ -410,7 +410,7 @@ def main():
) )
if data_args.pad_to_max_length: if data_args.pad_to_max_length:
data_collator = DefaultDataCollator(return_tensors="tf") data_collator = DefaultDataCollator(return_tensors="np")
else: else:
# custom class defined above, as HF has no data collator for multiple choice # custom class defined above, as HF has no data collator for multiple choice
data_collator = DataCollatorForMultipleChoice(tokenizer) data_collator = DataCollatorForMultipleChoice(tokenizer)
......
...@@ -533,7 +533,7 @@ def main(): ...@@ -533,7 +533,7 @@ def main():
model=model, model=model,
label_pad_token_id=label_pad_token_id, label_pad_token_id=label_pad_token_id,
pad_to_multiple_of=128, # Reduce the number of unique shapes for XLA, especially for generation pad_to_multiple_of=128, # Reduce the number of unique shapes for XLA, especially for generation
return_tensors="tf", return_tensors="np",
) )
dataset_options = tf.data.Options() dataset_options = tf.data.Options()
......
...@@ -345,9 +345,9 @@ def main(): ...@@ -345,9 +345,9 @@ def main():
datasets = datasets.map(preprocess_function, batched=True, load_from_cache_file=not data_args.overwrite_cache) datasets = datasets.map(preprocess_function, batched=True, load_from_cache_file=not data_args.overwrite_cache)
if data_args.pad_to_max_length: if data_args.pad_to_max_length:
data_collator = DefaultDataCollator(return_tensors="tf") data_collator = DefaultDataCollator(return_tensors="np")
else: else:
data_collator = DataCollatorWithPadding(tokenizer, return_tensors="tf") data_collator = DataCollatorWithPadding(tokenizer, return_tensors="np")
# endregion # endregion
# region Metric function # region Metric function
......
...@@ -396,7 +396,7 @@ def main(): ...@@ -396,7 +396,7 @@ def main():
# We need the DataCollatorForTokenClassification here, as we need to correctly pad labels as # We need the DataCollatorForTokenClassification here, as we need to correctly pad labels as
# well as inputs. # well as inputs.
collate_fn = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="tf") collate_fn = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="np")
num_replicas = training_args.strategy.num_replicas_in_sync num_replicas = training_args.strategy.num_replicas_in_sync
total_train_batch_size = training_args.per_device_train_batch_size * num_replicas total_train_batch_size = training_args.per_device_train_batch_size * num_replicas
......
...@@ -499,7 +499,7 @@ def main(): ...@@ -499,7 +499,7 @@ def main():
model=model, model=model,
label_pad_token_id=label_pad_token_id, label_pad_token_id=label_pad_token_id,
pad_to_multiple_of=64, # Reduce the number of unique shapes for XLA, especially for generation pad_to_multiple_of=64, # Reduce the number of unique shapes for XLA, especially for generation
return_tensors="tf", return_tensors="np",
) )
num_replicas = training_args.strategy.num_replicas_in_sync num_replicas = training_args.strategy.num_replicas_in_sync
total_train_batch_size = training_args.per_device_train_batch_size * num_replicas total_train_batch_size = training_args.per_device_train_batch_size * num_replicas
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment