Fixes to TF collators (#21143)

* Add num_workers for prepare_tf_dataset * Bugfix in the default collator and change default tensor type * Remove the "num_workers" arg and move it to a new PR

Fixes to TF collators (#21143)
* Add num_workers for prepare_tf_dataset * Bugfix in the default collator and change default tensor type * Remove the "num_workers" arg and move it to a new PR
e5dcceb8 · Matt · GitHub · 2411f0e4 · e5dcceb8 · e5dcceb8
Unverified Commit e5dcceb8 authored Jan 17, 2023 by Matt Committed by GitHub Jan 17, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

src/transformers/data/data_collator.py src/transformers/data/data_collator.py +1 -1

src/transformers/modeling_tf_utils.py src/transformers/modeling_tf_utils.py +2 -2

No files found.
--- a/src/transformers/data/data_collator.py
+++ b/src/transformers/data/data_collator.py
@@ -159,7 +159,7 @@ def tf_default_data_collator(features: List[InputDataClass]) -> Dict[str, Any]:
        label_col_name = None
    if label_col_name is not None:
        if isinstance(first[label_col_name], tf.Tensor):
-            dtype = tf.int64 if first[label_col_name].dtype.is_integer() else tf.float32
+            dtype = tf.int64 if first[label_col_name].dtype.is_integer else tf.float32
        elif isinstance(first[label_col_name], np.ndarray) or isinstance(first[label_col_name], np.generic):
            dtype = tf.int64 if np.issubdtype(first[label_col_name].dtype, np.integer) else tf.float32
        elif isinstance(first[label_col_name], (tuple, list)):

--- a/src/transformers/modeling_tf_utils.py
+++ b/src/transformers/modeling_tf_utils.py
@@ -1345,9 +1345,9 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu

        if collate_fn is None:
            if tokenizer is None:
-                collate_fn = DefaultDataCollator(return_tensors="tf")
+                collate_fn = DefaultDataCollator(return_tensors="np")
            else:
-                collate_fn = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
+                collate_fn = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="np")
        if collate_fn_args is None:
            collate_fn_args = dict()