Only keep the active part mof the loss for token classification

f3bda235 · Thibault Fevry · 8f8bbd4a · f3bda235
Commit f3bda235 authored Feb 04, 2019 by Thibault Fevry
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 1 deletion

pytorch_pretrained_bert/modeling.py pytorch_pretrained_bert/modeling.py +8 -1

No files found.
--- a/pytorch_pretrained_bert/modeling.py
+++ b/pytorch_pretrained_bert/modeling.py
@@ -1025,7 +1025,14 @@ class BertForTokenClassification(PreTrainedBertModel):

        if labels is not None:
            loss_fct = CrossEntropyLoss()
-            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+            # Only keep active parts of the loss
+            if attention_mask is not None:
+                active_loss = attention_mask.view(-1) == 1
+                active_logits = logits.view(-1, self.num_labels)[active_loss]
+                active_labels = labels.view(-1)[active_loss]
+                loss = loss_fct(active_logits, active_labels)
+            else:
+                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
            return loss
        else:
            return logits