Fix distiller.py (#12910)

* fix distiller * fix style

Fix distiller.py (#12910)
* fix distiller * fix style
c164064e · chutaklee · GitHub · 1da782cb · c164064e
Unverified Commit c164064e authored Jul 29, 2021 by chutaklee Committed by GitHub Jul 29, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 8 deletions

examples/research_projects/distillation/distiller.py examples/research_projects/distillation/distiller.py +6 -8

No files found.
--- a/examples/research_projects/distillation/distiller.py
+++ b/examples/research_projects/distillation/distiller.py
@@ -380,21 +380,19 @@ class Distiller:
        lm_labels: `torch.tensor(bs, seq_length)` - The language modeling labels (mlm labels for MLM and clm labels for CLM).
        """
        if self.mlm:
-            s_logits, s_hidden_states = self.student(
+            student_outputs = self.student(
                input_ids=input_ids, attention_mask=attention_mask
            )  # (bs, seq_length, voc_size)
            with torch.no_grad():
-                t_logits, t_hidden_states = self.teacher(
+                teacher_outputs = self.teacher(
                    input_ids=input_ids, attention_mask=attention_mask
                )  # (bs, seq_length, voc_size)
        else:
-            s_logits, _, s_hidden_states = self.student(
+            student_outputs = self.student(input_ids=input_ids, attention_mask=None)  # (bs, seq_length, voc_size)
-                input_ids=input_ids, attention_mask=None
-            )  # (bs, seq_length, voc_size)
            with torch.no_grad():
-                t_logits, _, t_hidden_states = self.teacher(
+                teacher_outputs = self.teacher(input_ids=input_ids, attention_mask=None)  # (bs, seq_length, voc_size)
-                    input_ids=input_ids, attention_mask=None
+        s_logits, s_hidden_states = student_outputs["logits"], student_outputs["hidden_states"]
-                )  # (bs, seq_length, voc_size)
+        t_logits, t_hidden_states = teacher_outputs["logits"], teacher_outputs["hidden_states"]
        assert s_logits.size() == t_logits.size()
        # https://github.com/peterliht/knowledge-distillation-pytorch/blob/master/model/net.py#L100