Add multi-node conditions in trainer_qa.py and trainer_seq2seq.py (#19502)

* Add multi-node conditions in trainer_qa.py and trainer_seq2seq.py * Code improvement

Add multi-node conditions in trainer_qa.py and trainer_seq2seq.py (#19502)
* Add multi-node conditions in trainer_qa.py and trainer_seq2seq.py * Code improvement
bb2cfd18 · regisss · GitHub · 69b81c0a · bb2cfd18 · bb2cfd18
Unverified Commit bb2cfd18 authored Oct 12, 2022 by regisss Committed by GitHub Oct 11, 2022
2 changed files
--- a/examples/pytorch/question-answering/trainer_qa.py
+++ b/examples/pytorch/question-answering/trainer_qa.py
@@ -52,7 +52,8 @@ class QuestionAnsweringTrainer(Trainer):
        finally:
            self.compute_metrics = compute_metrics
-        if self.post_process_function is not None and self.compute_metrics is not None:
+        if self.post_process_function is not None and self.compute_metrics is not None and self.args.should_save:
+            # Only the main node write the results by default
            eval_preds = self.post_process_function(eval_examples, eval_dataset, output.predictions)
            metrics = self.compute_metrics(eval_preds)
@@ -60,11 +61,13 @@ class QuestionAnsweringTrainer(Trainer):
            for key in list(metrics.keys()):
                if not key.startswith(f"{metric_key_prefix}_"):
                    metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
-            self.log(metrics)
        else:
            metrics = {}
+        if self.args.should_log:
+            # Only the main node log the results by default
+            self.log(metrics)
        if self.args.tpu_metrics_debug or self.args.debug:
            # tpu-comment: Logging debug metrics for PyTorch/XLA (compile, execute times, ops, etc.)
            xm.master_print(met.metrics_report())

--- a/examples/pytorch/question-answering/trainer_seq2seq_qa.py
+++ b/examples/pytorch/question-answering/trainer_seq2seq_qa.py
@@ -84,7 +84,8 @@ class QuestionAnsweringSeq2SeqTrainer(Seq2SeqTrainer):
            )
        )
-        if self.post_process_function is not None and self.compute_metrics is not None:
+        if self.post_process_function is not None and self.compute_metrics is not None and self.args.should_save:
+            # Only the main node write the results by default
            eval_preds = self.post_process_function(eval_examples, eval_dataset, output)
            metrics = self.compute_metrics(eval_preds)
@@ -94,7 +95,11 @@ class QuestionAnsweringSeq2SeqTrainer(Seq2SeqTrainer):
                    metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
            output.metrics.update(metrics)
+        else:
+            metrics = {}
+        if self.args.should_log:
+            # Only the main node log the results by default
            self.log(metrics)
        if self.args.tpu_metrics_debug or self.args.debug: