Add TemplateLM boilerplate LM class (#1279)

* loglikelihood refactor using template lm * linter * fix whitespace in target + prompt for CoT gsm8k (#1275) * Make `parallelize=True` vs. `accelerate launch` distinction clearer in docs (#1261) * Make parallelize=True distinction clearer in documentation. * run linter * Allow parameter edits for registered tasks when listed in a benchmark (#1273) * benchmark yamls allow minor edits of already registered tasks * add documentation * removed print * Fix data-parallel evaluation with quantized models (#1270) * add WIP device_map overrides * update handling outside of accelerate launcher * change .to(device) log to debug level * run linter * Rework documentation for explaining local dataset (#1284) * rewor documentation for explaining local dataset * fix typo * Update new_task_guide.md * Re-add citation It looks like Google Scholar has [already noticed](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C9...

Add TemplateLM boilerplate LM class (#1279)
* loglikelihood refactor using template lm * linter * fix whitespace in target + prompt for CoT gsm8k (#1275) * Make `parallelize=True` vs. `accelerate launch` distinction clearer in docs (#1261) * Make parallelize=True distinction clearer in documentation. * run linter * Allow parameter edits for registered tasks when listed in a benchmark (#1273) * benchmark yamls allow minor edits of already registered tasks * add documentation * removed print * Fix data-parallel evaluation with quantized models (#1270) * add WIP device_map overrides * update handling outside of accelerate launcher * change .to(device) log to debug level * run linter * Rework documentation for explaining local dataset (#1284) * rewor documentation for explaining local dataset * fix typo * Update new_task_guide.md * Re-add citation It looks like Google Scholar has [already noticed](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C9...
ba5cdf0f · Anjor Kanekar · GitHub · c26a6ac7 · ba5cdf0f · ba5cdf0f
Unverified Commit ba5cdf0f authored Feb 22, 2024 by Anjor Kanekar Committed by GitHub Feb 21, 2024
6 changed files
--- a/docs/model_guide.md
+++ b/docs/model_guide.md
@@ -66,7 +66,7 @@ All three request types take as input `requests` of type `list[Instance]` that h
  - It should return `(ll,) : Tuple[float]` , a.k.a. solely the *loglikelihood* of producing each piece of text given no starting input.
-To allow a model to be evaluated on all types of tasks, you will need to implement these three types of measurements (note that `loglikelihood_rolling` is a special case of `loglikelihood`). For a reference implementation, check out `lm_eval/models/huggingface.py` !
+To allow a model to be evaluated on all types of tasks, you will need to implement these three types of measurements (note that `loglikelihood_rolling` is a special case of `loglikelihood`). For a reference implementation, check out `lm_eval/models/huggingface.py` ! Additionally, check out `lm_eval.api.model.TemplateLM` for a class that abstracts away some commonly used functions across LM subclasses, or see if your model would lend itself well to subclassing the `lm_eval.models.huggingface.HFLM` class and overriding just the initialization or a couple methods!
 **Tip: be careful of indexing in loglikelihood!**

--- a/lm_eval/api/model.py
+++ b/lm_eval/api/model.py
@@ -247,3 +247,61 @@ class CachingLM:
    def get_cache_hook(self):
        return CacheHook(self)
+class TemplateLM(LM):
+    """
+    A class acting as intermediary between the LM base class
+    and boilerplate often included in other LM subclasses.
+    """
+    @property
+    @abc.abstractmethod
+    def eot_token_id(self):
+        pass
+    @abc.abstractmethod
+    def tok_encode(self, string: str, **kwargs):
+        pass
+    @abc.abstractmethod
+    def _loglikelihood_tokens(self, requests, **kwargs):
+        pass
+    def _encode_pair(self, context, continuation):
+        n_spaces = len(context) - len(context.rstrip())
+        if n_spaces > 0:
+            continuation = context[-n_spaces:] + continuation
+            context = context[:-n_spaces]
+        whole_enc = self.tok_encode(context + continuation, add_special_tokens=False)
+        context_enc = self.tok_encode(context, add_special_tokens=False)
+        context_enc_len = len(context_enc)
+        continuation_enc = whole_enc[context_enc_len:]
+        return context_enc, continuation_enc
+    def loglikelihood(self, requests) -> List[Tuple[float, bool]]:
+        new_reqs = []
+        for context, continuation in [req.args for req in requests]:
+            if context == "":
+                # end of text as context
+                context_enc, continuation_enc = (
+                    [self.eot_token_id],
+                    self.tok_encode(continuation),
+                )
+            else:
+                context_enc, continuation_enc = self._encode_pair(context, continuation)
+            new_reqs.append(((context, continuation), context_enc, continuation_enc))
+        return self._loglikelihood_tokens(new_reqs)
+    @abc.abstractmethod
+    def loglikelihood_rolling(self, requests) -> List[Tuple[float, bool]]:
+        pass
+    @abc.abstractmethod
+    def generate_until(self, requests) -> List[str]:
+        pass
--- a/lm_eval/models/huggingface.py
+++ b/lm_eval/models/huggingface.py
@@ -24,7 +24,7 @@ from transformers.models.auto.modeling_auto import (
 from lm_eval import utils
 from lm_eval.api.instance import Instance
-from lm_eval.api.model import LM
+from lm_eval.api.model import TemplateLM
 from lm_eval.api.registry import register_model
 from lm_eval.models.utils import (
    Collator,
@@ -64,7 +64,7 @@ def _get_accelerate_args(
 @register_model("hf-auto", "hf", "huggingface")
-class HFLM(LM):
+class HFLM(TemplateLM):
    """
    An abstracted Huggingface model class. Enables usage with both models of
    `transformers.AutoModelForCausalLM` and `transformers.AutoModelForSeq2SeqLM` classes.
@@ -780,39 +780,6 @@ class HFLM(LM):
        return logits
-    def _encode_pair(
-        self, context: str, continuation: str
-    ) -> Tuple[List[int], List[int]]:
-        n_spaces = len(context) - len(context.rstrip())
-        if n_spaces > 0:
-            continuation = context[-n_spaces:] + continuation
-            context = context[:-n_spaces]
-        whole_enc = self.tok_encode(context + continuation, add_special_tokens=False)
-        context_enc = self.tok_encode(context, add_special_tokens=False)
-        # whole_enc = self.tok_encode(context + continuation)
-        # context_enc = self.tok_encode(context, add_special_tokens=False)
-        context_enc_len = len(context_enc)
-        continuation_enc = whole_enc[context_enc_len:]
-        return context_enc, continuation_enc
-    def loglikelihood(self, requests: List[Instance]) -> List[Tuple[float, bool]]:
-        new_reqs = []
-        for context, continuation in [req.args for req in requests]:
-            if context == "":
-                # end of text as context
-                context_enc, continuation_enc = (
-                    [self.eot_token_id],
-                    self.tok_encode(continuation),
-                )
-            else:
-                context_enc, continuation_enc = self._encode_pair(context, continuation)
-            new_reqs.append(((context, continuation), context_enc, continuation_enc))
-        return self._loglikelihood_tokens(requests=new_reqs)
    def loglikelihood_rolling(self, requests: List[Instance]) -> List[float]:
        loglikelihoods = []

--- a/lm_eval/models/neuron_optimum.py
+++ b/lm_eval/models/neuron_optimum.py
@@ -15,7 +15,7 @@ from transformers.generation import StoppingCriteriaList
 import lm_eval.models.utils
 from lm_eval import utils
-from lm_eval.api.model import LM
+from lm_eval.api.model import TemplateLM
 from lm_eval.api.registry import register_model
 from lm_eval.models.utils import stop_sequences_criteria
@@ -172,7 +172,7 @@ class CustomNeuronModelForCausalLM(NeuronModelForCausalLM):
 @register_model("neuronx")
-class NEURON_HF(LM):
+class NEURON_HF(TemplateLM):
    """
    Enables usage with on AWS Neuron
    using the HuggingFace Transformers + Transformers neuronx library.
@@ -447,37 +447,6 @@ class NEURON_HF(LM):
        return logits
-    def _encode_pair(self, context, continuation):
-        n_spaces = len(context) - len(context.rstrip())
-        if n_spaces > 0:
-            continuation = context[-n_spaces:] + continuation
-            context = context[:-n_spaces]
-        whole_enc = self.tok_encode(context + continuation, add_special_tokens=False)
-        context_enc = self.tok_encode(context, add_special_tokens=False)
-        # whole_enc = self.tok_encode(context + continuation)
-        # context_enc = self.tok_encode(context, add_special_tokens=False)
-        context_enc_len = len(context_enc)
-        continuation_enc = whole_enc[context_enc_len:]
-        return context_enc, continuation_enc
-    def loglikelihood(self, requests):
-        new_reqs = []
-        for context, continuation in [req.args for req in requests]:
-            if context == "":
-                # end of text as context
-                context_enc, continuation_enc = (
-                    [self.eot_token_id],
-                    self.tok_encode(continuation),
-                )
-            else:
-                context_enc, continuation_enc = self._encode_pair(context, continuation)
-            new_reqs.append(((context, continuation), context_enc, continuation_enc))
-        return self._loglikelihood_tokens(new_reqs)
    def loglikelihood_rolling(self, requests):
        loglikelihoods = []

--- a/lm_eval/models/openai_completions.py
+++ b/lm_eval/models/openai_completions.py
@@ -8,7 +8,7 @@ from tqdm import tqdm
 import lm_eval.models.utils
 from lm_eval import utils
-from lm_eval.api.model import LM
+from lm_eval.api.model import LM, TemplateLM
 from lm_eval.api.registry import register_model
 from lm_eval.models.utils import retry_on_specific_exceptions
 from lm_eval.utils import eval_logger
@@ -75,7 +75,7 @@ def oa_completion(client, chat: bool = False, **kwargs):
 @register_model("openai-completions", "local-completions")
-class OpenaiCompletionsLM(LM):
+class OpenaiCompletionsLM(TemplateLM):
    _DEFAULT_MAX_LENGTH = 2048
    def __init__(
@@ -171,41 +171,12 @@ class OpenaiCompletionsLM(LM):
        # Isn't used because we override _loglikelihood_tokens
        raise NotImplementedError()
-    def tok_encode(self, string: str) -> List[int]:
+    def tok_encode(self, string: str, **kwargs) -> List[int]:
        return self.tokenizer.encode(string)
    def tok_decode(self, tokens: List[int]) -> str:
        return self.tokenizer.decode(tokens)
-    def _encode_pair(
-        self, context: str, continuation: str
-    ) -> Tuple[List[int], List[int]]:
-        n_spaces = len(context) - len(context.rstrip())
-        if n_spaces > 0:
-            continuation = context[-n_spaces:] + continuation
-            context = context[:-n_spaces]
-        whole_enc = self.tok_encode(context + continuation)
-        context_enc = self.tok_encode(context)
-        context_enc_len = len(context_enc)
-        continuation_enc = whole_enc[context_enc_len:]
-        return context_enc, continuation_enc
-    def loglikelihood(self, requests) -> List[Tuple[float, bool]]:
-        new_reqs = []
-        for context, continuation in [req.args for req in requests]:
-            if context == "":
-                # end of text as context
-                context_enc, continuation_enc = (
-                    [self.eot_token_id],
-                    self.tok_encode(continuation),
-                )
-            else:
-                context_enc, continuation_enc = self._encode_pair(context, continuation)
-            new_reqs.append(((context, continuation), context_enc, continuation_enc))
-        return self._loglikelihood_tokens(new_reqs)
    def _loglikelihood_tokens(
        self, requests, disable_tqdm: bool = False
    ) -> List[Tuple[float, bool]]:

--- a/lm_eval/models/vllm_causallms.py
+++ b/lm_eval/models/vllm_causallms.py
@@ -5,7 +5,7 @@ from typing import List, Literal, Optional, Tuple, Union
 from tqdm import tqdm
 from lm_eval.api.instance import Instance
-from lm_eval.api.model import LM
+from lm_eval.api.model import TemplateLM
 from lm_eval.api.registry import register_model
 from lm_eval.models.utils import Collator, divide
 from lm_eval.utils import (
@@ -35,7 +35,7 @@ def run_inference_one_model(
 @register_model("vllm")
-class VLLM(LM):
+class VLLM(TemplateLM):
    _DEFAULT_MAX_LENGTH = 2048
    def __init__(
@@ -194,37 +194,6 @@ class VLLM(LM):
        )
        return outputs
-    def _encode_pair(
-        self, context: str, continuation: str
-    ) -> Tuple[List[int], List[int]]:
-        n_spaces = len(context) - len(context.rstrip())
-        if n_spaces > 0:
-            continuation = context[-n_spaces:] + continuation
-            context = context[:-n_spaces]
-        whole_enc = self.tok_encode(context + continuation, add_special_tokens=False)
-        context_enc = self.tok_encode(context, add_special_tokens=False)
-        context_enc_len = len(context_enc)
-        continuation_enc = whole_enc[context_enc_len:]
-        return context_enc, continuation_enc
-    def loglikelihood(self, requests: List[Instance]) -> List[Tuple[float, bool]]:
-        new_reqs = []
-        for context, continuation in [req.args for req in requests]:
-            if context == "":
-                # end of text as context
-                context_enc, continuation_enc = (
-                    [self.eot_token_id],
-                    self.tok_encode(continuation),
-                )
-            else:
-                context_enc, continuation_enc = self._encode_pair(context, continuation)
-            new_reqs.append(((context, continuation), context_enc, continuation_enc))
-        return self._loglikelihood_tokens(new_reqs)
    def loglikelihood_rolling(self, requests: List[Instance]) -> List[float]:
        loglikelihoods = []