Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
071529bd
Unverified
Commit
071529bd
authored
Jan 24, 2023
by
Matt
Committed by
GitHub
Jan 24, 2023
Browse files
Use return_tensors="np" instead of "tf" (#21266)
Return NP instead of TF tensors for our data loading pipeline
parent
f0fc7912
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
8 additions
and
8 deletions
+8
-8
examples/tensorflow/language-modeling/run_mlm.py
examples/tensorflow/language-modeling/run_mlm.py
+1
-1
examples/tensorflow/multiple-choice/run_swag.py
examples/tensorflow/multiple-choice/run_swag.py
+2
-2
examples/tensorflow/summarization/run_summarization.py
examples/tensorflow/summarization/run_summarization.py
+1
-1
examples/tensorflow/text-classification/run_glue.py
examples/tensorflow/text-classification/run_glue.py
+2
-2
examples/tensorflow/token-classification/run_ner.py
examples/tensorflow/token-classification/run_ner.py
+1
-1
examples/tensorflow/translation/run_translation.py
examples/tensorflow/translation/run_translation.py
+1
-1
No files found.
examples/tensorflow/language-modeling/run_mlm.py
View file @
071529bd
...
@@ -499,7 +499,7 @@ def main():
...
@@ -499,7 +499,7 @@ def main():
# region TF Dataset preparation
# region TF Dataset preparation
num_replicas
=
training_args
.
strategy
.
num_replicas_in_sync
num_replicas
=
training_args
.
strategy
.
num_replicas_in_sync
data_collator
=
DataCollatorForLanguageModeling
(
data_collator
=
DataCollatorForLanguageModeling
(
tokenizer
=
tokenizer
,
mlm_probability
=
data_args
.
mlm_probability
,
return_tensors
=
"
tf
"
tokenizer
=
tokenizer
,
mlm_probability
=
data_args
.
mlm_probability
,
return_tensors
=
"
np
"
)
)
options
=
tf
.
data
.
Options
()
options
=
tf
.
data
.
Options
()
options
.
experimental_distribute
.
auto_shard_policy
=
tf
.
data
.
experimental
.
AutoShardPolicy
.
OFF
options
.
experimental_distribute
.
auto_shard_policy
=
tf
.
data
.
experimental
.
AutoShardPolicy
.
OFF
...
...
examples/tensorflow/multiple-choice/run_swag.py
View file @
071529bd
...
@@ -105,7 +105,7 @@ class DataCollatorForMultipleChoice:
...
@@ -105,7 +105,7 @@ class DataCollatorForMultipleChoice:
padding
=
self
.
padding
,
padding
=
self
.
padding
,
max_length
=
self
.
max_length
,
max_length
=
self
.
max_length
,
pad_to_multiple_of
=
self
.
pad_to_multiple_of
,
pad_to_multiple_of
=
self
.
pad_to_multiple_of
,
return_tensors
=
"
tf
"
,
return_tensors
=
"
np
"
,
)
)
# Un-flatten
# Un-flatten
...
@@ -410,7 +410,7 @@ def main():
...
@@ -410,7 +410,7 @@ def main():
)
)
if
data_args
.
pad_to_max_length
:
if
data_args
.
pad_to_max_length
:
data_collator
=
DefaultDataCollator
(
return_tensors
=
"
tf
"
)
data_collator
=
DefaultDataCollator
(
return_tensors
=
"
np
"
)
else
:
else
:
# custom class defined above, as HF has no data collator for multiple choice
# custom class defined above, as HF has no data collator for multiple choice
data_collator
=
DataCollatorForMultipleChoice
(
tokenizer
)
data_collator
=
DataCollatorForMultipleChoice
(
tokenizer
)
...
...
examples/tensorflow/summarization/run_summarization.py
View file @
071529bd
...
@@ -533,7 +533,7 @@ def main():
...
@@ -533,7 +533,7 @@ def main():
model
=
model
,
model
=
model
,
label_pad_token_id
=
label_pad_token_id
,
label_pad_token_id
=
label_pad_token_id
,
pad_to_multiple_of
=
128
,
# Reduce the number of unique shapes for XLA, especially for generation
pad_to_multiple_of
=
128
,
# Reduce the number of unique shapes for XLA, especially for generation
return_tensors
=
"
tf
"
,
return_tensors
=
"
np
"
,
)
)
dataset_options
=
tf
.
data
.
Options
()
dataset_options
=
tf
.
data
.
Options
()
...
...
examples/tensorflow/text-classification/run_glue.py
View file @
071529bd
...
@@ -345,9 +345,9 @@ def main():
...
@@ -345,9 +345,9 @@ def main():
datasets
=
datasets
.
map
(
preprocess_function
,
batched
=
True
,
load_from_cache_file
=
not
data_args
.
overwrite_cache
)
datasets
=
datasets
.
map
(
preprocess_function
,
batched
=
True
,
load_from_cache_file
=
not
data_args
.
overwrite_cache
)
if
data_args
.
pad_to_max_length
:
if
data_args
.
pad_to_max_length
:
data_collator
=
DefaultDataCollator
(
return_tensors
=
"
tf
"
)
data_collator
=
DefaultDataCollator
(
return_tensors
=
"
np
"
)
else
:
else
:
data_collator
=
DataCollatorWithPadding
(
tokenizer
,
return_tensors
=
"
tf
"
)
data_collator
=
DataCollatorWithPadding
(
tokenizer
,
return_tensors
=
"
np
"
)
# endregion
# endregion
# region Metric function
# region Metric function
...
...
examples/tensorflow/token-classification/run_ner.py
View file @
071529bd
...
@@ -396,7 +396,7 @@ def main():
...
@@ -396,7 +396,7 @@ def main():
# We need the DataCollatorForTokenClassification here, as we need to correctly pad labels as
# We need the DataCollatorForTokenClassification here, as we need to correctly pad labels as
# well as inputs.
# well as inputs.
collate_fn
=
DataCollatorForTokenClassification
(
tokenizer
=
tokenizer
,
return_tensors
=
"
tf
"
)
collate_fn
=
DataCollatorForTokenClassification
(
tokenizer
=
tokenizer
,
return_tensors
=
"
np
"
)
num_replicas
=
training_args
.
strategy
.
num_replicas_in_sync
num_replicas
=
training_args
.
strategy
.
num_replicas_in_sync
total_train_batch_size
=
training_args
.
per_device_train_batch_size
*
num_replicas
total_train_batch_size
=
training_args
.
per_device_train_batch_size
*
num_replicas
...
...
examples/tensorflow/translation/run_translation.py
View file @
071529bd
...
@@ -499,7 +499,7 @@ def main():
...
@@ -499,7 +499,7 @@ def main():
model
=
model
,
model
=
model
,
label_pad_token_id
=
label_pad_token_id
,
label_pad_token_id
=
label_pad_token_id
,
pad_to_multiple_of
=
64
,
# Reduce the number of unique shapes for XLA, especially for generation
pad_to_multiple_of
=
64
,
# Reduce the number of unique shapes for XLA, especially for generation
return_tensors
=
"
tf
"
,
return_tensors
=
"
np
"
,
)
)
num_replicas
=
training_args
.
strategy
.
num_replicas_in_sync
num_replicas
=
training_args
.
strategy
.
num_replicas_in_sync
total_train_batch_size
=
training_args
.
per_device_train_batch_size
*
num_replicas
total_train_batch_size
=
training_args
.
per_device_train_batch_size
*
num_replicas
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment