Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
45e26125
Unverified
Commit
45e26125
authored
Jun 28, 2020
by
Sam Shleifer
Committed by
GitHub
Jun 28, 2020
Browse files
save_pretrained: mkdir(exist_ok=True) (#5258)
* all save_pretrained methods mkdir if not os.path.exists
parent
12dfbd4f
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
17 additions
and
59 deletions
+17
-59
examples/bert-loses-patience/run_glue_with_pabee.py
examples/bert-loses-patience/run_glue_with_pabee.py
+0
-6
examples/contrib/mm-imdb/run_mmimdb.py
examples/contrib/mm-imdb/run_mmimdb.py
+0
-4
examples/contrib/run_swag.py
examples/contrib/run_swag.py
+0
-6
examples/distillation/run_squad_w_distillation.py
examples/distillation/run_squad_w_distillation.py
+0
-4
examples/movement-pruning/masked_run_glue.py
examples/movement-pruning/masked_run_glue.py
+0
-4
examples/movement-pruning/masked_run_squad.py
examples/movement-pruning/masked_run_squad.py
+0
-4
examples/question-answering/run_squad.py
examples/question-answering/run_squad.py
+0
-6
examples/seq2seq/distillation.py
examples/seq2seq/distillation.py
+0
-2
examples/text-classification/run_xnli.py
examples/text-classification/run_xnli.py
+0
-4
src/transformers/configuration_utils.py
src/transformers/configuration_utils.py
+3
-4
src/transformers/convert_pytorch_checkpoint_to_tf2.py
src/transformers/convert_pytorch_checkpoint_to_tf2.py
+0
-1
src/transformers/modeling_tf_utils.py
src/transformers/modeling_tf_utils.py
+4
-3
src/transformers/modeling_utils.py
src/transformers/modeling_utils.py
+4
-3
src/transformers/pipelines.py
src/transformers/pipelines.py
+3
-2
src/transformers/tokenization_utils_base.py
src/transformers/tokenization_utils_base.py
+3
-2
templates/adding_a_new_example_script/run_xxx.py
templates/adding_a_new_example_script/run_xxx.py
+0
-4
No files found.
examples/bert-loses-patience/run_glue_with_pabee.py
View file @
45e26125
...
@@ -226,8 +226,6 @@ def train(args, train_dataset, model, tokenizer):
...
@@ -226,8 +226,6 @@ def train(args, train_dataset, model, tokenizer):
if
args
.
local_rank
in
[
-
1
,
0
]
and
args
.
save_steps
>
0
and
global_step
%
args
.
save_steps
==
0
:
if
args
.
local_rank
in
[
-
1
,
0
]
and
args
.
save_steps
>
0
and
global_step
%
args
.
save_steps
==
0
:
# Save model checkpoint
# Save model checkpoint
output_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"checkpoint-{}"
.
format
(
global_step
))
output_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"checkpoint-{}"
.
format
(
global_step
))
if
not
os
.
path
.
exists
(
output_dir
):
os
.
makedirs
(
output_dir
)
model_to_save
=
(
model_to_save
=
(
model
.
module
if
hasattr
(
model
,
"module"
)
else
model
model
.
module
if
hasattr
(
model
,
"module"
)
else
model
)
# Take care of distributed/parallel training
)
# Take care of distributed/parallel training
...
@@ -649,10 +647,6 @@ def main():
...
@@ -649,10 +647,6 @@ def main():
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
# Create output directory if needed
if
not
os
.
path
.
exists
(
args
.
output_dir
)
and
args
.
local_rank
in
[
-
1
,
0
]:
os
.
makedirs
(
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
# They can then be reloaded using `from_pretrained()`
...
...
examples/contrib/mm-imdb/run_mmimdb.py
View file @
45e26125
...
@@ -521,10 +521,6 @@ def main():
...
@@ -521,10 +521,6 @@ def main():
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
# Create output directory if needed
if
not
os
.
path
.
exists
(
args
.
output_dir
)
and
args
.
local_rank
in
[
-
1
,
0
]:
os
.
makedirs
(
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
# They can then be reloaded using `from_pretrained()`
...
...
examples/contrib/run_swag.py
View file @
45e26125
...
@@ -383,8 +383,6 @@ def train(args, train_dataset, model, tokenizer):
...
@@ -383,8 +383,6 @@ def train(args, train_dataset, model, tokenizer):
if
args
.
local_rank
in
[
-
1
,
0
]
and
args
.
save_steps
>
0
and
global_step
%
args
.
save_steps
==
0
:
if
args
.
local_rank
in
[
-
1
,
0
]
and
args
.
save_steps
>
0
and
global_step
%
args
.
save_steps
==
0
:
# Save model checkpoint
# Save model checkpoint
output_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"checkpoint-{}"
.
format
(
global_step
))
output_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"checkpoint-{}"
.
format
(
global_step
))
if
not
os
.
path
.
exists
(
output_dir
):
os
.
makedirs
(
output_dir
)
model_to_save
=
(
model_to_save
=
(
model
.
module
if
hasattr
(
model
,
"module"
)
else
model
model
.
module
if
hasattr
(
model
,
"module"
)
else
model
)
# Take care of distributed/parallel training
)
# Take care of distributed/parallel training
...
@@ -651,10 +649,6 @@ def main():
...
@@ -651,10 +649,6 @@ def main():
# Save the trained model and the tokenizer
# Save the trained model and the tokenizer
if
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
:
if
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
:
# Create output directory if needed
if
not
os
.
path
.
exists
(
args
.
output_dir
)
and
args
.
local_rank
in
[
-
1
,
0
]:
os
.
makedirs
(
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
# They can then be reloaded using `from_pretrained()`
...
...
examples/distillation/run_squad_w_distillation.py
View file @
45e26125
...
@@ -809,10 +809,6 @@ def main():
...
@@ -809,10 +809,6 @@ def main():
# Save the trained model and the tokenizer
# Save the trained model and the tokenizer
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
# Create output directory if needed
if
not
os
.
path
.
exists
(
args
.
output_dir
)
and
args
.
local_rank
in
[
-
1
,
0
]:
os
.
makedirs
(
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
# They can then be reloaded using `from_pretrained()`
...
...
examples/movement-pruning/masked_run_glue.py
View file @
45e26125
...
@@ -875,10 +875,6 @@ def main():
...
@@ -875,10 +875,6 @@ def main():
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
# Create output directory if needed
if
not
os
.
path
.
exists
(
args
.
output_dir
)
and
args
.
local_rank
in
[
-
1
,
0
]:
os
.
makedirs
(
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
# They can then be reloaded using `from_pretrained()`
...
...
examples/movement-pruning/masked_run_squad.py
View file @
45e26125
...
@@ -1059,10 +1059,6 @@ def main():
...
@@ -1059,10 +1059,6 @@ def main():
# Save the trained model and the tokenizer
# Save the trained model and the tokenizer
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
# Create output directory if needed
if
not
os
.
path
.
exists
(
args
.
output_dir
)
and
args
.
local_rank
in
[
-
1
,
0
]:
os
.
makedirs
(
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
# They can then be reloaded using `from_pretrained()`
...
...
examples/question-answering/run_squad.py
View file @
45e26125
...
@@ -240,8 +240,6 @@ def train(args, train_dataset, model, tokenizer):
...
@@ -240,8 +240,6 @@ def train(args, train_dataset, model, tokenizer):
# Save model checkpoint
# Save model checkpoint
if
args
.
local_rank
in
[
-
1
,
0
]
and
args
.
save_steps
>
0
and
global_step
%
args
.
save_steps
==
0
:
if
args
.
local_rank
in
[
-
1
,
0
]
and
args
.
save_steps
>
0
and
global_step
%
args
.
save_steps
==
0
:
output_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"checkpoint-{}"
.
format
(
global_step
))
output_dir
=
os
.
path
.
join
(
args
.
output_dir
,
"checkpoint-{}"
.
format
(
global_step
))
if
not
os
.
path
.
exists
(
output_dir
):
os
.
makedirs
(
output_dir
)
# Take care of distributed/parallel training
# Take care of distributed/parallel training
model_to_save
=
model
.
module
if
hasattr
(
model
,
"module"
)
else
model
model_to_save
=
model
.
module
if
hasattr
(
model
,
"module"
)
else
model
model_to_save
.
save_pretrained
(
output_dir
)
model_to_save
.
save_pretrained
(
output_dir
)
...
@@ -768,10 +766,6 @@ def main():
...
@@ -768,10 +766,6 @@ def main():
# Save the trained model and the tokenizer
# Save the trained model and the tokenizer
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
# Create output directory if needed
if
not
os
.
path
.
exists
(
args
.
output_dir
)
and
args
.
local_rank
in
[
-
1
,
0
]:
os
.
makedirs
(
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
# They can then be reloaded using `from_pretrained()`
...
...
examples/seq2seq/distillation.py
View file @
45e26125
...
@@ -92,8 +92,6 @@ class BartSummarizationDistiller(SummarizationModule):
...
@@ -92,8 +92,6 @@ class BartSummarizationDistiller(SummarizationModule):
student
=
BartForConditionalGeneration
(
student_cfg
)
student
=
BartForConditionalGeneration
(
student_cfg
)
student
,
_
=
init_student
(
student
,
teacher
)
student
,
_
=
init_student
(
student
,
teacher
)
save_dir
=
self
.
output_dir
.
joinpath
(
"student"
)
save_dir
=
self
.
output_dir
.
joinpath
(
"student"
)
save_dir
.
mkdir
(
exist_ok
=
True
)
self
.
copy_to_student
(
d_layers_to_copy
,
e_layers_to_copy
,
hparams
,
student
,
teacher
)
self
.
copy_to_student
(
d_layers_to_copy
,
e_layers_to_copy
,
hparams
,
student
,
teacher
)
student
.
save_pretrained
(
save_dir
)
student
.
save_pretrained
(
save_dir
)
hparams
.
model_name_or_path
=
str
(
save_dir
)
hparams
.
model_name_or_path
=
str
(
save_dir
)
...
...
examples/text-classification/run_xnli.py
View file @
45e26125
...
@@ -573,10 +573,6 @@ def main():
...
@@ -573,10 +573,6 @@ def main():
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
# Create output directory if needed
if
not
os
.
path
.
exists
(
args
.
output_dir
)
and
args
.
local_rank
in
[
-
1
,
0
]:
os
.
makedirs
(
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
# They can then be reloaded using `from_pretrained()`
...
...
src/transformers/configuration_utils.py
View file @
45e26125
...
@@ -132,10 +132,9 @@ class PretrainedConfig(object):
...
@@ -132,10 +132,9 @@ class PretrainedConfig(object):
save_directory (:obj:`string`):
save_directory (:obj:`string`):
Directory where the configuration JSON file will be saved.
Directory where the configuration JSON file will be saved.
"""
"""
assert
os
.
path
.
isdir
(
if
os
.
path
.
isfile
(
save_directory
):
save_directory
raise
AssertionError
(
"Provided path ({}) should be a directory, not a file"
.
format
(
save_directory
))
),
"Saving path should be a directory where the model and configuration can be saved"
os
.
makedirs
(
save_directory
,
exist_ok
=
True
)
# If we save using the predefined names, we can load using `from_pretrained`
# If we save using the predefined names, we can load using `from_pretrained`
output_config_file
=
os
.
path
.
join
(
save_directory
,
CONFIG_NAME
)
output_config_file
=
os
.
path
.
join
(
save_directory
,
CONFIG_NAME
)
...
...
src/transformers/convert_pytorch_checkpoint_to_tf2.py
View file @
45e26125
...
@@ -240,7 +240,6 @@ def convert_all_pt_checkpoints_to_tf(
...
@@ -240,7 +240,6 @@ def convert_all_pt_checkpoints_to_tf(
remove_cached_files
=
False
,
remove_cached_files
=
False
,
only_convert_finetuned_models
=
False
,
only_convert_finetuned_models
=
False
,
):
):
assert
os
.
path
.
isdir
(
args
.
tf_dump_path
),
"--tf_dump_path should be a directory"
if
args_model_type
is
None
:
if
args_model_type
is
None
:
model_types
=
list
(
MODEL_CLASSES
.
keys
())
model_types
=
list
(
MODEL_CLASSES
.
keys
())
...
...
src/transformers/modeling_tf_utils.py
View file @
45e26125
...
@@ -315,9 +315,10 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin):
...
@@ -315,9 +315,10 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin):
""" Save a model and its configuration file to a directory, so that it
""" Save a model and its configuration file to a directory, so that it
can be re-loaded using the :func:`~transformers.PreTrainedModel.from_pretrained` class method.
can be re-loaded using the :func:`~transformers.PreTrainedModel.from_pretrained` class method.
"""
"""
assert
os
.
path
.
isdir
(
if
os
.
path
.
isfile
(
save_directory
):
save_directory
logger
.
error
(
"Provided path ({}) should be a directory, not a file"
.
format
(
save_directory
))
),
"Saving path should be a directory where the model and configuration can be saved"
return
os
.
makedirs
(
save_directory
,
exist_ok
=
True
)
# Save configuration file
# Save configuration file
self
.
config
.
save_pretrained
(
save_directory
)
self
.
config
.
save_pretrained
(
save_directory
)
...
...
src/transformers/modeling_utils.py
View file @
45e26125
...
@@ -477,9 +477,10 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
...
@@ -477,9 +477,10 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
Arguments:
Arguments:
save_directory: directory to which to save.
save_directory: directory to which to save.
"""
"""
assert
os
.
path
.
isdir
(
if
os
.
path
.
isfile
(
save_directory
):
save_directory
logger
.
error
(
"Provided path ({}) should be a directory, not a file"
.
format
(
save_directory
))
),
"Saving path should be a directory where the model and configuration can be saved"
return
os
.
makedirs
(
save_directory
,
exist_ok
=
True
)
# Only save the model itself if we are using distributed training
# Only save the model itself if we are using distributed training
model_to_save
=
self
.
module
if
hasattr
(
self
,
"module"
)
else
self
model_to_save
=
self
.
module
if
hasattr
(
self
,
"module"
)
else
self
...
...
src/transformers/pipelines.py
View file @
45e26125
...
@@ -405,9 +405,10 @@ class Pipeline(_ScikitCompat):
...
@@ -405,9 +405,10 @@ class Pipeline(_ScikitCompat):
"""
"""
Save the pipeline's model and tokenizer to the specified save_directory
Save the pipeline's model and tokenizer to the specified save_directory
"""
"""
if
not
os
.
path
.
is
dir
(
save_directory
):
if
os
.
path
.
is
file
(
save_directory
):
logger
.
error
(
"Provided path ({}) should be a directory"
.
format
(
save_directory
))
logger
.
error
(
"Provided path ({}) should be a directory
, not a file
"
.
format
(
save_directory
))
return
return
os
.
makedirs
(
save_directory
,
exist_ok
=
True
)
self
.
model
.
save_pretrained
(
save_directory
)
self
.
model
.
save_pretrained
(
save_directory
)
self
.
tokenizer
.
save_pretrained
(
save_directory
)
self
.
tokenizer
.
save_pretrained
(
save_directory
)
...
...
src/transformers/tokenization_utils_base.py
View file @
45e26125
...
@@ -1343,9 +1343,10 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
...
@@ -1343,9 +1343,10 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
This method make sure the full tokenizer can then be re-loaded using the
This method make sure the full tokenizer can then be re-loaded using the
:func:`~transformers.PreTrainedTokenizer.from_pretrained` class method.
:func:`~transformers.PreTrainedTokenizer.from_pretrained` class method.
"""
"""
if
not
os
.
path
.
is
dir
(
save_directory
):
if
os
.
path
.
is
file
(
save_directory
):
logger
.
error
(
"
Saving directory
({}) should be a directory"
.
format
(
save_directory
))
logger
.
error
(
"
Provided path
({}) should be a directory
, not a file
"
.
format
(
save_directory
))
return
return
os
.
makedirs
(
save_directory
,
exist_ok
=
True
)
special_tokens_map_file
=
os
.
path
.
join
(
save_directory
,
SPECIAL_TOKENS_MAP_FILE
)
special_tokens_map_file
=
os
.
path
.
join
(
save_directory
,
SPECIAL_TOKENS_MAP_FILE
)
added_tokens_file
=
os
.
path
.
join
(
save_directory
,
ADDED_TOKENS_FILE
)
added_tokens_file
=
os
.
path
.
join
(
save_directory
,
ADDED_TOKENS_FILE
)
...
...
templates/adding_a_new_example_script/run_xxx.py
View file @
45e26125
...
@@ -653,10 +653,6 @@ def main():
...
@@ -653,10 +653,6 @@ def main():
# Save the trained model and the tokenizer
# Save the trained model and the tokenizer
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
if
args
.
do_train
and
(
args
.
local_rank
==
-
1
or
torch
.
distributed
.
get_rank
()
==
0
):
# Create output directory if needed
if
not
os
.
path
.
exists
(
args
.
output_dir
)
and
args
.
local_rank
in
[
-
1
,
0
]:
os
.
makedirs
(
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
args
.
output_dir
)
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
# They can then be reloaded using `from_pretrained()`
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment