gpt2: extract model call

e6decf02 · Leo Gao · 7a39d68b · e6decf02
Commit e6decf02 authored May 11, 2021 by Leo Gao
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 1 deletion

lm_eval/models/gpt2.py lm_eval/models/gpt2.py +11 -1

No files found.
--- a/lm_eval/models/gpt2.py
+++ b/lm_eval/models/gpt2.py
@@ -155,7 +155,7 @@ class GPT2LM(LM):
                    contlens.append(cont)
                    inplens.append(inplen)

-                multi_logits = F.log_softmax(self.gpt2(torch.cat(inps, dim=0))[0][:, :, :50257], dim=-1).cpu()  # [batch, seq, vocab]
+                multi_logits = F.log_softmax(self._model_call(torch.cat(inps, dim=0)), dim=-1).cpu()  # [batch, seq, vocab]

                for (cache_key, _, _), logits, inp, inplen, cont_toks in zip(chunk, multi_logits, inps, inplens, contlens):
                    contlen = len(cont_toks)
@@ -183,6 +183,16 @@ class GPT2LM(LM):

        return reord.get_original(res)
    
+    def _model_call(self, inps):
+        """
+        inps: a torch tensor of shape [batch, sequence]
+        the size of sequence may vary from call to call
+
+        returns: a torch tensor of shape [batch, sequence, vocab] with the
+        logits retuned from the model
+        """
+        return self.gpt2(inps)[0][:, :, :50257]
+    
    def greedy_until(self, requests):
        # TODO: implement fully general `until` that handles untils that are 
        # multiple tokens or that span multiple tokens correctly