Unverified Commit ab177588 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Add speed metrics to all example scripts + template (#9260)

parent 5b5f7dd0
...@@ -341,9 +341,20 @@ def main(): ...@@ -341,9 +341,20 @@ def main():
if (model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path)) if (model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path))
else None else None
) )
trainer.train(model_path=model_path) train_result = trainer.train(model_path=model_path)
trainer.save_model() # Saves the tokenizer too for easy upload trainer.save_model() # Saves the tokenizer too for easy upload
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
if trainer.is_world_process_zero():
with open(output_train_file, "w") as writer:
logger.info("***** Train results *****")
for key, value in sorted(train_result.metrics.items()):
logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n")
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
# Evaluation # Evaluation
results = {} results = {}
if training_args.do_eval: if training_args.do_eval:
...@@ -358,7 +369,7 @@ def main(): ...@@ -358,7 +369,7 @@ def main():
if trainer.is_world_process_zero(): if trainer.is_world_process_zero():
with open(output_eval_file, "w") as writer: with open(output_eval_file, "w") as writer:
logger.info("***** Eval results *****") logger.info("***** Eval results *****")
for key, value in results.items(): for key, value in sorted(results.items()):
logger.info(f" {key} = {value}") logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n") writer.write(f"{key} = {value}\n")
......
...@@ -376,9 +376,20 @@ def main(): ...@@ -376,9 +376,20 @@ def main():
if (model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path)) if (model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path))
else None else None
) )
trainer.train(model_path=model_path) train_result = trainer.train(model_path=model_path)
trainer.save_model() # Saves the tokenizer too for easy upload trainer.save_model() # Saves the tokenizer too for easy upload
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
if trainer.is_world_process_zero():
with open(output_train_file, "w") as writer:
logger.info("***** Train results *****")
for key, value in sorted(train_result.metrics.items()):
logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n")
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
# Evaluation # Evaluation
results = {} results = {}
if training_args.do_eval: if training_args.do_eval:
...@@ -393,7 +404,7 @@ def main(): ...@@ -393,7 +404,7 @@ def main():
if trainer.is_world_process_zero(): if trainer.is_world_process_zero():
with open(output_eval_file, "w") as writer: with open(output_eval_file, "w") as writer:
logger.info("***** Eval results *****") logger.info("***** Eval results *****")
for key, value in results.items(): for key, value in sorted(results.items()):
logger.info(f" {key} = {value}") logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n") writer.write(f"{key} = {value}\n")
......
...@@ -334,9 +334,20 @@ def main(): ...@@ -334,9 +334,20 @@ def main():
if (model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path)) if (model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path))
else None else None
) )
trainer.train(model_path=model_path) train_result = trainer.train(model_path=model_path)
trainer.save_model() # Saves the tokenizer too for easy upload trainer.save_model() # Saves the tokenizer too for easy upload
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
if trainer.is_world_process_zero():
with open(output_train_file, "w") as writer:
logger.info("***** Train results *****")
for key, value in sorted(train_result.metrics.items()):
logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n")
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
# Evaluation # Evaluation
results = {} results = {}
if training_args.do_eval: if training_args.do_eval:
...@@ -351,7 +362,7 @@ def main(): ...@@ -351,7 +362,7 @@ def main():
if trainer.is_world_process_zero(): if trainer.is_world_process_zero():
with open(output_eval_file, "w") as writer: with open(output_eval_file, "w") as writer:
logger.info("***** Eval results *****") logger.info("***** Eval results *****")
for key, value in results.items(): for key, value in sorted(results.items()):
logger.info(f" {key} = {value}") logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n") writer.write(f"{key} = {value}\n")
......
...@@ -363,9 +363,20 @@ def main(): ...@@ -363,9 +363,20 @@ def main():
if (model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path)) if (model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path))
else None else None
) )
trainer.train(model_path=model_path) train_result = trainer.train(model_path=model_path)
trainer.save_model() # Saves the tokenizer too for easy upload trainer.save_model() # Saves the tokenizer too for easy upload
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
if trainer.is_world_process_zero():
with open(output_train_file, "w") as writer:
logger.info("***** Train results *****")
for key, value in sorted(train_result.metrics.items()):
logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n")
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
# Evaluation # Evaluation
results = {} results = {}
if training_args.do_eval: if training_args.do_eval:
...@@ -380,7 +391,7 @@ def main(): ...@@ -380,7 +391,7 @@ def main():
if trainer.is_world_process_zero(): if trainer.is_world_process_zero():
with open(output_eval_file, "w") as writer: with open(output_eval_file, "w") as writer:
logger.info("***** Eval results *****") logger.info("***** Eval results *****")
for key, value in results.items(): for key, value in sorted(results.items()):
logger.info(f" {key} = {value}") logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n") writer.write(f"{key} = {value}\n")
......
...@@ -317,11 +317,22 @@ def main(): ...@@ -317,11 +317,22 @@ def main():
# Training # Training
if training_args.do_train: if training_args.do_train:
trainer.train( train_result = trainer.train(
model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None
) )
trainer.save_model() # Saves the tokenizer too for easy upload trainer.save_model() # Saves the tokenizer too for easy upload
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
if trainer.is_world_process_zero():
with open(output_train_file, "w") as writer:
logger.info("***** Train results *****")
for key, value in sorted(train_result.metrics.items()):
logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n")
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
# Evaluation # Evaluation
results = {} results = {}
if training_args.do_eval: if training_args.do_eval:
...@@ -333,7 +344,7 @@ def main(): ...@@ -333,7 +344,7 @@ def main():
if trainer.is_world_process_zero(): if trainer.is_world_process_zero():
with open(output_eval_file, "w") as writer: with open(output_eval_file, "w") as writer:
logger.info("***** Eval results *****") logger.info("***** Eval results *****")
for key, value in results.items(): for key, value in sorted(results.items()):
logger.info(f" {key} = {value}") logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n") writer.write(f"{key} = {value}\n")
......
...@@ -438,11 +438,22 @@ def main(): ...@@ -438,11 +438,22 @@ def main():
# Training # Training
if training_args.do_train: if training_args.do_train:
trainer.train( train_result = trainer.train(
model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None
) )
trainer.save_model() # Saves the tokenizer too for easy upload trainer.save_model() # Saves the tokenizer too for easy upload
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
if trainer.is_world_process_zero():
with open(output_train_file, "w") as writer:
logger.info("***** Train results *****")
for key, value in sorted(train_result.metrics.items()):
logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n")
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
# Evaluation # Evaluation
results = {} results = {}
if training_args.do_eval: if training_args.do_eval:
...@@ -453,7 +464,7 @@ def main(): ...@@ -453,7 +464,7 @@ def main():
if trainer.is_world_process_zero(): if trainer.is_world_process_zero():
with open(output_eval_file, "w") as writer: with open(output_eval_file, "w") as writer:
logger.info("***** Eval results *****") logger.info("***** Eval results *****")
for key, value in results.items(): for key, value in sorted(results.items()):
logger.info(f" {key} = {value}") logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n") writer.write(f"{key} = {value}\n")
......
...@@ -481,11 +481,22 @@ def main(): ...@@ -481,11 +481,22 @@ def main():
# Training # Training
if training_args.do_train: if training_args.do_train:
trainer.train( train_result = trainer.train(
model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None
) )
trainer.save_model() # Saves the tokenizer too for easy upload trainer.save_model() # Saves the tokenizer too for easy upload
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
if trainer.is_world_process_zero():
with open(output_train_file, "w") as writer:
logger.info("***** Train results *****")
for key, value in sorted(train_result.metrics.items()):
logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n")
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
# Evaluation # Evaluation
results = {} results = {}
if training_args.do_eval: if training_args.do_eval:
...@@ -496,7 +507,7 @@ def main(): ...@@ -496,7 +507,7 @@ def main():
if trainer.is_world_process_zero(): if trainer.is_world_process_zero():
with open(output_eval_file, "w") as writer: with open(output_eval_file, "w") as writer:
logger.info("***** Eval results *****") logger.info("***** Eval results *****")
for key, value in results.items(): for key, value in sorted(results.items()):
logger.info(f" {key} = {value}") logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n") writer.write(f"{key} = {value}\n")
......
...@@ -340,11 +340,22 @@ def main(): ...@@ -340,11 +340,22 @@ def main():
# Training # Training
if training_args.do_train: if training_args.do_train:
trainer.train( train_result = trainer.train(
model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None
) )
trainer.save_model() # Saves the tokenizer too for easy upload trainer.save_model() # Saves the tokenizer too for easy upload
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
if trainer.is_world_process_zero():
with open(output_train_file, "w") as writer:
logger.info("***** Train results *****")
for key, value in sorted(train_result.metrics.items()):
logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n")
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
# Evaluation # Evaluation
results = {} results = {}
if training_args.do_eval: if training_args.do_eval:
...@@ -377,7 +388,7 @@ def main(): ...@@ -377,7 +388,7 @@ def main():
output_test_results_file = os.path.join(training_args.output_dir, "test_results.txt") output_test_results_file = os.path.join(training_args.output_dir, "test_results.txt")
if trainer.is_world_process_zero(): if trainer.is_world_process_zero():
with open(output_test_results_file, "w") as writer: with open(output_test_results_file, "w") as writer:
for key, value in metrics.items(): for key, value in sorted(metrics.items()):
logger.info(f" {key} = {value}") logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n") writer.write(f"{key} = {value}\n")
......
...@@ -308,7 +308,7 @@ def main(): ...@@ -308,7 +308,7 @@ def main():
# Training # Training
if training_args.do_train: if training_args.do_train:
{%- if cookiecutter.can_train_from_scratch == "False" %} {%- if cookiecutter.can_train_from_scratch == "False" %}
trainer.train( train_result = trainer.train(
model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None
) )
{%- elif cookiecutter.can_train_from_scratch == "True" %} {%- elif cookiecutter.can_train_from_scratch == "True" %}
...@@ -317,10 +317,21 @@ def main(): ...@@ -317,10 +317,21 @@ def main():
if (model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path)) if (model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path))
else None else None
) )
trainer.train(model_path=model_path) train_result = trainer.train(model_path=model_path)
{% endif %} {% endif %}
trainer.save_model() # Saves the tokenizer too for easy upload trainer.save_model() # Saves the tokenizer too for easy upload
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
if trainer.is_world_process_zero():
with open(output_train_file, "w") as writer:
logger.info("***** Train results *****")
for key, value in sorted(train_result.metrics.items()):
logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n")
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
# Evaluation # Evaluation
results = {} results = {}
if training_args.do_eval: if training_args.do_eval:
...@@ -332,7 +343,7 @@ def main(): ...@@ -332,7 +343,7 @@ def main():
if trainer.is_world_process_zero(): if trainer.is_world_process_zero():
with open(output_eval_file, "w") as writer: with open(output_eval_file, "w") as writer:
logger.info("***** Eval results *****") logger.info("***** Eval results *****")
for key, value in results.items(): for key, value in sorted(results.items()):
logger.info(f" {key} = {value}") logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n") writer.write(f"{key} = {value}\n")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment