"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "1750e629006bb6989aef5b4e141f3477f891a098"
Unverified Commit 8f20e61c authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Update feature selection in to_tf_dataset (#21935)

* Update feature selection

* Check compatibility with datasets version

* Checkout from datasets main
parent 345a1371
...@@ -385,12 +385,12 @@ Convert your datasets to the `tf.data.Dataset` format using the [`~datasets.Data ...@@ -385,12 +385,12 @@ Convert your datasets to the `tf.data.Dataset` format using the [`~datasets.Data
```py ```py
>>> # converting our train dataset to tf.data.Dataset >>> # converting our train dataset to tf.data.Dataset
>>> tf_train_dataset = food["train"].to_tf_dataset( >>> tf_train_dataset = food["train"].to_tf_dataset(
... columns=["pixel_values"], label_cols=["label"], shuffle=True, batch_size=batch_size, collate_fn=data_collator ... columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
... ) ... )
>>> # converting our test dataset to tf.data.Dataset >>> # converting our test dataset to tf.data.Dataset
>>> tf_eval_dataset = food["test"].to_tf_dataset( >>> tf_eval_dataset = food["test"].to_tf_dataset(
... columns=["pixel_values"], label_cols=["label"], shuffle=True, batch_size=batch_size, collate_fn=data_collator ... columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
... ) ... )
``` ```
......
...@@ -173,7 +173,7 @@ A continuación, convierte los datasets tokenizados en datasets de TensorFlow co ...@@ -173,7 +173,7 @@ A continuación, convierte los datasets tokenizados en datasets de TensorFlow co
```py ```py
>>> tf_train_dataset = small_train_dataset.to_tf_dataset( >>> tf_train_dataset = small_train_dataset.to_tf_dataset(
... columns=["attention_mask", "input_ids", "token_type_ids"], ... columns=["attention_mask", "input_ids", "token_type_ids"],
... label_cols=["labels"], ... label_cols="labels",
... shuffle=True, ... shuffle=True,
... collate_fn=data_collator, ... collate_fn=data_collator,
... batch_size=8, ... batch_size=8,
...@@ -181,7 +181,7 @@ A continuación, convierte los datasets tokenizados en datasets de TensorFlow co ...@@ -181,7 +181,7 @@ A continuación, convierte los datasets tokenizados en datasets de TensorFlow co
>>> tf_validation_dataset = small_eval_dataset.to_tf_dataset( >>> tf_validation_dataset = small_eval_dataset.to_tf_dataset(
... columns=["attention_mask", "input_ids", "token_type_ids"], ... columns=["attention_mask", "input_ids", "token_type_ids"],
... label_cols=["labels"], ... label_cols="labels",
... shuffle=False, ... shuffle=False,
... collate_fn=data_collator, ... collate_fn=data_collator,
... batch_size=8, ... batch_size=8,
......
...@@ -205,7 +205,7 @@ Especifique suas entradas em `columns` e seu rótulo em `label_cols`: ...@@ -205,7 +205,7 @@ Especifique suas entradas em `columns` e seu rótulo em `label_cols`:
```py ```py
>>> tf_train_dataset = small_train_dataset.to_tf_dataset( >>> tf_train_dataset = small_train_dataset.to_tf_dataset(
... columns=["attention_mask", "input_ids", "token_type_ids"], ... columns=["attention_mask", "input_ids", "token_type_ids"],
... label_cols=["labels"], ... label_cols="labels",
... shuffle=True, ... shuffle=True,
... collate_fn=data_collator, ... collate_fn=data_collator,
... batch_size=8, ... batch_size=8,
...@@ -213,7 +213,7 @@ Especifique suas entradas em `columns` e seu rótulo em `label_cols`: ...@@ -213,7 +213,7 @@ Especifique suas entradas em `columns` e seu rótulo em `label_cols`:
>>> tf_validation_dataset = small_eval_dataset.to_tf_dataset( >>> tf_validation_dataset = small_eval_dataset.to_tf_dataset(
... columns=["attention_mask", "input_ids", "token_type_ids"], ... columns=["attention_mask", "input_ids", "token_type_ids"],
... label_cols=["labels"], ... label_cols="labels",
... shuffle=False, ... shuffle=False,
... collate_fn=data_collator, ... collate_fn=data_collator,
... batch_size=8, ... batch_size=8,
......
...@@ -1413,6 +1413,12 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu ...@@ -1413,6 +1413,12 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu
feature_cols = [col for col in output_columns if col in model_inputs and col not in model_labels] feature_cols = [col for col in output_columns if col in model_inputs and col not in model_labels]
label_cols = [col for col in output_columns if col in model_labels] label_cols = [col for col in output_columns if col in model_labels]
# Backwards compatibility for older versions of datasets. Previously, if `columns` or `label_cols`
# were a single element list, the returned element spec would be a single element. Now, passing [feature]
# will return a dict structure {"feature": feature}, and passing a single string will return a single element.
feature_cols = feature_cols[0] if len(feature_cols) == 1 else feature_cols
label_cols = label_cols[0] if len(label_cols) == 1 else label_cols
if drop_remainder is None: if drop_remainder is None:
drop_remainder = shuffle drop_remainder = shuffle
tf_dataset = dataset.to_tf_dataset( tf_dataset = dataset.to_tf_dataset(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment