Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
071529bd
Unverified
Commit
071529bd
authored
Jan 24, 2023
by
Matt
Committed by
GitHub
Jan 24, 2023
Browse files
Use return_tensors="np" instead of "tf" (#21266)
Return NP instead of TF tensors for our data loading pipeline
parent
f0fc7912
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
8 additions
and
8 deletions
+8
-8
examples/tensorflow/language-modeling/run_mlm.py
examples/tensorflow/language-modeling/run_mlm.py
+1
-1
examples/tensorflow/multiple-choice/run_swag.py
examples/tensorflow/multiple-choice/run_swag.py
+2
-2
examples/tensorflow/summarization/run_summarization.py
examples/tensorflow/summarization/run_summarization.py
+1
-1
examples/tensorflow/text-classification/run_glue.py
examples/tensorflow/text-classification/run_glue.py
+2
-2
examples/tensorflow/token-classification/run_ner.py
examples/tensorflow/token-classification/run_ner.py
+1
-1
examples/tensorflow/translation/run_translation.py
examples/tensorflow/translation/run_translation.py
+1
-1
No files found.
examples/tensorflow/language-modeling/run_mlm.py
View file @
071529bd
...
@@ -499,7 +499,7 @@ def main():
...
@@ -499,7 +499,7 @@ def main():
# region TF Dataset preparation
# region TF Dataset preparation
num_replicas
=
training_args
.
strategy
.
num_replicas_in_sync
num_replicas
=
training_args
.
strategy
.
num_replicas_in_sync
data_collator
=
DataCollatorForLanguageModeling
(
data_collator
=
DataCollatorForLanguageModeling
(
tokenizer
=
tokenizer
,
mlm_probability
=
data_args
.
mlm_probability
,
return_tensors
=
"
tf
"
tokenizer
=
tokenizer
,
mlm_probability
=
data_args
.
mlm_probability
,
return_tensors
=
"
np
"
)
)
options
=
tf
.
data
.
Options
()
options
=
tf
.
data
.
Options
()
options
.
experimental_distribute
.
auto_shard_policy
=
tf
.
data
.
experimental
.
AutoShardPolicy
.
OFF
options
.
experimental_distribute
.
auto_shard_policy
=
tf
.
data
.
experimental
.
AutoShardPolicy
.
OFF
...
...
examples/tensorflow/multiple-choice/run_swag.py
View file @
071529bd
...
@@ -105,7 +105,7 @@ class DataCollatorForMultipleChoice:
...
@@ -105,7 +105,7 @@ class DataCollatorForMultipleChoice:
padding
=
self
.
padding
,
padding
=
self
.
padding
,
max_length
=
self
.
max_length
,
max_length
=
self
.
max_length
,
pad_to_multiple_of
=
self
.
pad_to_multiple_of
,
pad_to_multiple_of
=
self
.
pad_to_multiple_of
,
return_tensors
=
"
tf
"
,
return_tensors
=
"
np
"
,
)
)
# Un-flatten
# Un-flatten
...
@@ -410,7 +410,7 @@ def main():
...
@@ -410,7 +410,7 @@ def main():
)
)
if
data_args
.
pad_to_max_length
:
if
data_args
.
pad_to_max_length
:
data_collator
=
DefaultDataCollator
(
return_tensors
=
"
tf
"
)
data_collator
=
DefaultDataCollator
(
return_tensors
=
"
np
"
)
else
:
else
:
# custom class defined above, as HF has no data collator for multiple choice
# custom class defined above, as HF has no data collator for multiple choice
data_collator
=
DataCollatorForMultipleChoice
(
tokenizer
)
data_collator
=
DataCollatorForMultipleChoice
(
tokenizer
)
...
...
examples/tensorflow/summarization/run_summarization.py
View file @
071529bd
...
@@ -533,7 +533,7 @@ def main():
...
@@ -533,7 +533,7 @@ def main():
model
=
model
,
model
=
model
,
label_pad_token_id
=
label_pad_token_id
,
label_pad_token_id
=
label_pad_token_id
,
pad_to_multiple_of
=
128
,
# Reduce the number of unique shapes for XLA, especially for generation
pad_to_multiple_of
=
128
,
# Reduce the number of unique shapes for XLA, especially for generation
return_tensors
=
"
tf
"
,
return_tensors
=
"
np
"
,
)
)
dataset_options
=
tf
.
data
.
Options
()
dataset_options
=
tf
.
data
.
Options
()
...
...
examples/tensorflow/text-classification/run_glue.py
View file @
071529bd
...
@@ -345,9 +345,9 @@ def main():
...
@@ -345,9 +345,9 @@ def main():
datasets
=
datasets
.
map
(
preprocess_function
,
batched
=
True
,
load_from_cache_file
=
not
data_args
.
overwrite_cache
)
datasets
=
datasets
.
map
(
preprocess_function
,
batched
=
True
,
load_from_cache_file
=
not
data_args
.
overwrite_cache
)
if
data_args
.
pad_to_max_length
:
if
data_args
.
pad_to_max_length
:
data_collator
=
DefaultDataCollator
(
return_tensors
=
"
tf
"
)
data_collator
=
DefaultDataCollator
(
return_tensors
=
"
np
"
)
else
:
else
:
data_collator
=
DataCollatorWithPadding
(
tokenizer
,
return_tensors
=
"
tf
"
)
data_collator
=
DataCollatorWithPadding
(
tokenizer
,
return_tensors
=
"
np
"
)
# endregion
# endregion
# region Metric function
# region Metric function
...
...
examples/tensorflow/token-classification/run_ner.py
View file @
071529bd
...
@@ -396,7 +396,7 @@ def main():
...
@@ -396,7 +396,7 @@ def main():
# We need the DataCollatorForTokenClassification here, as we need to correctly pad labels as
# We need the DataCollatorForTokenClassification here, as we need to correctly pad labels as
# well as inputs.
# well as inputs.
collate_fn
=
DataCollatorForTokenClassification
(
tokenizer
=
tokenizer
,
return_tensors
=
"
tf
"
)
collate_fn
=
DataCollatorForTokenClassification
(
tokenizer
=
tokenizer
,
return_tensors
=
"
np
"
)
num_replicas
=
training_args
.
strategy
.
num_replicas_in_sync
num_replicas
=
training_args
.
strategy
.
num_replicas_in_sync
total_train_batch_size
=
training_args
.
per_device_train_batch_size
*
num_replicas
total_train_batch_size
=
training_args
.
per_device_train_batch_size
*
num_replicas
...
...
examples/tensorflow/translation/run_translation.py
View file @
071529bd
...
@@ -499,7 +499,7 @@ def main():
...
@@ -499,7 +499,7 @@ def main():
model
=
model
,
model
=
model
,
label_pad_token_id
=
label_pad_token_id
,
label_pad_token_id
=
label_pad_token_id
,
pad_to_multiple_of
=
64
,
# Reduce the number of unique shapes for XLA, especially for generation
pad_to_multiple_of
=
64
,
# Reduce the number of unique shapes for XLA, especially for generation
return_tensors
=
"
tf
"
,
return_tensors
=
"
np
"
,
)
)
num_replicas
=
training_args
.
strategy
.
num_replicas_in_sync
num_replicas
=
training_args
.
strategy
.
num_replicas_in_sync
total_train_batch_size
=
training_args
.
per_device_train_batch_size
*
num_replicas
total_train_batch_size
=
training_args
.
per_device_train_batch_size
*
num_replicas
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment