Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
08a5f575
Unverified
Commit
08a5f575
authored
Nov 05, 2021
by
Sylvain Gugger
Committed by
GitHub
Nov 05, 2021
Browse files
Add new LFS prune API (#14294)
parent
4be78c22
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
51 additions
and
23 deletions
+51
-23
examples/pytorch/language-modeling/run_clm_no_trainer.py
examples/pytorch/language-modeling/run_clm_no_trainer.py
+4
-2
examples/pytorch/language-modeling/run_mlm_no_trainer.py
examples/pytorch/language-modeling/run_mlm_no_trainer.py
+4
-2
examples/pytorch/multiple-choice/run_swag_no_trainer.py
examples/pytorch/multiple-choice/run_swag_no_trainer.py
+4
-2
examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py
...torch/question-answering/run_qa_beam_search_no_trainer.py
+4
-2
examples/pytorch/question-answering/run_qa_no_trainer.py
examples/pytorch/question-answering/run_qa_no_trainer.py
+4
-2
examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py
...speech-pretraining/run_wav2vec2_pretraining_no_trainer.py
+6
-2
examples/pytorch/summarization/run_summarization_no_trainer.py
...les/pytorch/summarization/run_summarization_no_trainer.py
+4
-2
examples/pytorch/text-classification/run_glue_no_trainer.py
examples/pytorch/text-classification/run_glue_no_trainer.py
+4
-2
examples/pytorch/token-classification/run_ner_no_trainer.py
examples/pytorch/token-classification/run_ner_no_trainer.py
+4
-2
examples/pytorch/translation/run_translation_no_trainer.py
examples/pytorch/translation/run_translation_no_trainer.py
+4
-2
src/transformers/trainer.py
src/transformers/trainer.py
+9
-3
No files found.
examples/pytorch/language-modeling/run_clm_no_trainer.py
View file @
08a5f575
...
...
@@ -507,7 +507,9 @@ def main():
unwrapped_model
.
save_pretrained
(
args
.
output_dir
,
save_function
=
accelerator
.
save
)
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
,
auto_lfs_prune
=
True
)
if
args
.
output_dir
is
not
None
:
accelerator
.
wait_for_everyone
()
...
...
@@ -516,7 +518,7 @@ def main():
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
if
args
.
push_to_hub
:
repo
.
push_to_hub
(
commit_message
=
"End of training"
)
repo
.
push_to_hub
(
commit_message
=
"End of training"
,
auto_lfs_prune
=
True
)
if
__name__
==
"__main__"
:
...
...
examples/pytorch/language-modeling/run_mlm_no_trainer.py
View file @
08a5f575
...
...
@@ -548,7 +548,9 @@ def main():
unwrapped_model
.
save_pretrained
(
args
.
output_dir
,
save_function
=
accelerator
.
save
)
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
,
auto_lfs_prune
=
True
)
if
args
.
output_dir
is
not
None
:
accelerator
.
wait_for_everyone
()
...
...
@@ -557,7 +559,7 @@ def main():
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
if
args
.
push_to_hub
:
repo
.
push_to_hub
(
commit_message
=
"End of training"
)
repo
.
push_to_hub
(
commit_message
=
"End of training"
,
auto_lfs_prune
=
True
)
if
__name__
==
"__main__"
:
...
...
examples/pytorch/multiple-choice/run_swag_no_trainer.py
View file @
08a5f575
...
...
@@ -505,7 +505,9 @@ def main():
unwrapped_model
.
save_pretrained
(
args
.
output_dir
,
save_function
=
accelerator
.
save
)
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
,
auto_lfs_prune
=
True
)
if
args
.
output_dir
is
not
None
:
accelerator
.
wait_for_everyone
()
...
...
@@ -514,7 +516,7 @@ def main():
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
if
args
.
push_to_hub
:
repo
.
push_to_hub
(
commit_message
=
"End of training"
)
repo
.
push_to_hub
(
commit_message
=
"End of training"
,
auto_lfs_prune
=
True
)
if
__name__
==
"__main__"
:
...
...
examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py
View file @
08a5f575
...
...
@@ -731,7 +731,9 @@ def main():
unwrapped_model
.
save_pretrained
(
args
.
output_dir
,
save_function
=
accelerator
.
save
)
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
,
auto_lfs_prune
=
True
)
# intialize all lists to collect the batches
all_start_top_log_probs
=
[]
...
...
@@ -853,7 +855,7 @@ def main():
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
if
args
.
push_to_hub
:
repo
.
push_to_hub
(
commit_message
=
"End of training"
)
repo
.
push_to_hub
(
commit_message
=
"End of training"
,
auto_lfs_prune
=
True
)
if
__name__
==
"__main__"
:
...
...
examples/pytorch/question-answering/run_qa_no_trainer.py
View file @
08a5f575
...
...
@@ -737,7 +737,9 @@ def main():
unwrapped_model
.
save_pretrained
(
args
.
output_dir
,
save_function
=
accelerator
.
save
)
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
,
auto_lfs_prune
=
True
)
# Evaluation
logger
.
info
(
"***** Running Evaluation *****"
)
...
...
@@ -816,7 +818,7 @@ def main():
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
if
args
.
push_to_hub
:
repo
.
push_to_hub
(
commit_message
=
"End of training"
)
repo
.
push_to_hub
(
commit_message
=
"End of training"
,
auto_lfs_prune
=
True
)
if
__name__
==
"__main__"
:
...
...
examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py
View file @
08a5f575
...
...
@@ -667,7 +667,11 @@ def main():
unwrapped_model
.
save_pretrained
(
args
.
output_dir
,
save_function
=
accelerator
.
save
)
if
(
args
.
push_to_hub
and
epoch
<
args
.
num_train_epochs
-
1
)
and
accelerator
.
is_main_process
:
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress step
{
completed_steps
}
"
,
blocking
=
False
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress step
{
completed_steps
}
"
,
blocking
=
False
,
auto_lfs_prune
=
True
,
)
# if completed steps > `args.max_train_steps` stop
if
completed_steps
>=
args
.
max_train_steps
:
...
...
@@ -714,7 +718,7 @@ def main():
unwrapped_model
.
save_pretrained
(
args
.
output_dir
,
save_function
=
accelerator
.
save
)
if
accelerator
.
is_main_process
:
if
args
.
push_to_hub
:
repo
.
push_to_hub
(
commit_message
=
"End of training"
)
repo
.
push_to_hub
(
commit_message
=
"End of training"
,
auto_lfs_prune
=
True
)
if
__name__
==
"__main__"
:
...
...
examples/pytorch/summarization/run_summarization_no_trainer.py
View file @
08a5f575
...
...
@@ -601,7 +601,9 @@ def main():
unwrapped_model
.
save_pretrained
(
args
.
output_dir
,
save_function
=
accelerator
.
save
)
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
,
auto_lfs_prune
=
True
)
if
args
.
output_dir
is
not
None
:
accelerator
.
wait_for_everyone
()
...
...
@@ -610,7 +612,7 @@ def main():
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
if
args
.
push_to_hub
:
repo
.
push_to_hub
(
commit_message
=
"End of training"
)
repo
.
push_to_hub
(
commit_message
=
"End of training"
,
auto_lfs_prune
=
True
)
if
__name__
==
"__main__"
:
...
...
examples/pytorch/text-classification/run_glue_no_trainer.py
View file @
08a5f575
...
...
@@ -453,7 +453,9 @@ def main():
unwrapped_model
.
save_pretrained
(
args
.
output_dir
,
save_function
=
accelerator
.
save
)
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
,
auto_lfs_prune
=
True
)
if
args
.
output_dir
is
not
None
:
accelerator
.
wait_for_everyone
()
...
...
@@ -462,7 +464,7 @@ def main():
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
if
args
.
push_to_hub
:
repo
.
push_to_hub
(
commit_message
=
"End of training"
)
repo
.
push_to_hub
(
commit_message
=
"End of training"
,
auto_lfs_prune
=
True
)
if
args
.
task_name
==
"mnli"
:
# Final evaluation on mismatched validation set
...
...
examples/pytorch/token-classification/run_ner_no_trainer.py
View file @
08a5f575
...
...
@@ -590,7 +590,9 @@ def main():
unwrapped_model
.
save_pretrained
(
args
.
output_dir
,
save_function
=
accelerator
.
save
)
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
,
auto_lfs_prune
=
True
)
if
args
.
output_dir
is
not
None
:
accelerator
.
wait_for_everyone
()
...
...
@@ -599,7 +601,7 @@ def main():
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
if
args
.
push_to_hub
:
repo
.
push_to_hub
(
commit_message
=
"End of training"
)
repo
.
push_to_hub
(
commit_message
=
"End of training"
,
auto_lfs_prune
=
True
)
if
__name__
==
"__main__"
:
...
...
examples/pytorch/translation/run_translation_no_trainer.py
View file @
08a5f575
...
...
@@ -580,7 +580,9 @@ def main():
unwrapped_model
.
save_pretrained
(
args
.
output_dir
,
save_function
=
accelerator
.
save
)
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
)
repo
.
push_to_hub
(
commit_message
=
f
"Training in progress epoch
{
epoch
}
"
,
blocking
=
False
,
auto_lfs_prune
=
True
)
if
args
.
output_dir
is
not
None
:
accelerator
.
wait_for_everyone
()
...
...
@@ -589,7 +591,7 @@ def main():
if
accelerator
.
is_main_process
:
tokenizer
.
save_pretrained
(
args
.
output_dir
)
if
args
.
push_to_hub
:
repo
.
push_to_hub
(
commit_message
=
"End of training"
)
repo
.
push_to_hub
(
commit_message
=
"End of training"
,
auto_lfs_prune
=
True
)
if
__name__
==
"__main__"
:
...
...
src/transformers/trainer.py
View file @
08a5f575
...
...
@@ -2644,7 +2644,9 @@ class Trainer:
commit_message
=
f
"Training in progress, step
{
self
.
state
.
global_step
}
"
else
:
commit_message
=
f
"Training in progress, epoch
{
int
(
self
.
state
.
epoch
)
}
"
_
,
self
.
push_in_progress
=
self
.
repo
.
push_to_hub
(
commit_message
=
commit_message
,
blocking
=
False
)
_
,
self
.
push_in_progress
=
self
.
repo
.
push_to_hub
(
commit_message
=
commit_message
,
blocking
=
False
,
auto_lfs_prune
=
True
)
finally
:
if
self
.
args
.
hub_strategy
==
HubStrategy
.
CHECKPOINT
:
# Move back the checkpoint to its place
...
...
@@ -2680,12 +2682,16 @@ class Trainer:
if
not
self
.
is_world_process_zero
():
return
git_head_commit_url
=
self
.
repo
.
push_to_hub
(
commit_message
=
commit_message
,
blocking
=
blocking
)
git_head_commit_url
=
self
.
repo
.
push_to_hub
(
commit_message
=
commit_message
,
blocking
=
blocking
,
auto_lfs_prune
=
True
)
# push separately the model card to be independant from the rest of the model
if
self
.
args
.
should_save
:
self
.
create_model_card
(
model_name
=
model_name
,
**
kwargs
)
try
:
self
.
repo
.
push_to_hub
(
commit_message
=
"update model card README.md"
,
blocking
=
blocking
)
self
.
repo
.
push_to_hub
(
commit_message
=
"update model card README.md"
,
blocking
=
blocking
,
auto_lfs_prune
=
True
)
except
EnvironmentError
as
exc
:
logger
.
error
(
f
"Error pushing update to the model card. Please read logs and retry.
\n
$
{
exc
}
"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment