Integrated Optimum models

810fd2e5 · Alexander · b21c8f3d · 810fd2e5 · 810fd2e5 · 810fd2e5
Commit 810fd2e5 authored Jun 09, 2023 by Alexander
6 changed files
--- a/lm_eval/evaluator.py
+++ b/lm_eval/evaluator.py
@@ -23,6 +23,7 @@ def simple_evaluate(
    description_dict=None,
    check_integrity=False,
    decontamination_ngrams_path=None,
+    tokenizer=None,
    write_out=False,
    output_base_path=None,
 ):
@@ -67,7 +68,7 @@ def simple_evaluate(
        if model_args is None:
            model_args = ""
        lm = lm_eval.models.get_model(model).create_from_arg_string(
-            model_args, {"batch_size": batch_size, "device": device}
+            model_args, {"batch_size": batch_size, "device": device, "tokenizer": tokenizer, "trust_remote_code": True}
        )
    else:
        assert isinstance(model, lm_eval.base.LM)

--- a/lm_eval/models/__init__.py
+++ b/lm_eval/models/__init__.py
@@ -15,6 +15,7 @@ MODEL_REGISTRY = {
    "anthropic": anthropic_llms.AnthropicLM,
    "textsynth": textsynth.TextSynthLM,
    "dummy": dummy.DummyLM,
+    "optimum-causal": gpt2.OPTIMUMLM,
 }



--- a/lm_eval/models/gpt2.py
+++ b/lm_eval/models/gpt2.py
@@ -2,6 +2,8 @@ import torch
 import transformers
 from typing import Optional, Union
 from lm_eval.base import BaseLM
+import optimum
+from optimum.intel.openvino import OVModelForCausalLM


 def _get_dtype(
@@ -142,3 +144,125 @@ class HFLM(BaseLM):

 # for backwards compatibility
 GPT2LM = HFLM
+
+class OPTIMUMLM(BaseLM):
+    def __init__(
+        self,
+        device="cpu",
+        pretrained="gpt2",
+        revision="main",
+        low_cpu_mem_usage=None,
+        subfolder=None,
+        tokenizer=None,
+        batch_size=1,
+        load_in_8bit: Optional[bool] = False,
+        trust_remote_code: Optional[bool] = False,
+    ):
+        super().__init__()
+
+        assert isinstance(device, str)
+        assert isinstance(pretrained, str)
+        assert isinstance(batch_size, (int,str))
+
+        device_list = set(["cuda", "cpu"] + [f'cuda:{i}' for i in range(torch.cuda.device_count())])
+        if device and device in device_list:
+            self._device = torch.device(device)
+            print(f"Using device '{device}'")
+        else:
+            print("Device not specified")
+            print(f"Cuda Available? {torch.cuda.is_available()}")
+            self._device = (
+                torch.device("cuda")
+                if torch.cuda.is_available()
+                else torch.device("cpu")
+            )
+
+        # TODO: update this to be less of a hack once subfolder is fixed in HF
+        revision = revision + ("/" + subfolder if subfolder is not None else "")
+
+        self.gpt2 = OVModelForCausalLM.from_pretrained(
+            pretrained,
+            # load_in_8bit=load_in_8bit,
+            # low_cpu_mem_usage=low_cpu_mem_usage,
+            revision=revision,
+            trust_remote_code=trust_remote_code,
+            use_cache=True,
+        )
+        #self.gpt2.eval()
+
+        self.tokenizer = transformers.AutoTokenizer.from_pretrained(
+            pretrained if tokenizer is None else tokenizer,
+            revision=revision,
+            trust_remote_code=trust_remote_code,
+        )
+
+        self.vocab_size = self.tokenizer.vocab_size
+
+        # if isinstance(
+        #     self.tokenizer, (transformers.GPT2Tokenizer, transformers.GPT2TokenizerFast)
+        # ):
+        #     assert self.tokenizer.encode("hello\n\nhello") == [
+        #         31373,
+        #         198,
+        #         198,
+        #         31373,
+        #     ], self.tokenizer.encode("hello\n\nhello")
+
+        # setup for automatic batch size detection
+        if batch_size == 'auto': 
+            self.batch_size_per_gpu = batch_size
+        else:
+            self.batch_size_per_gpu = int(batch_size) 
+
+    @property
+    def eot_token_id(self):
+        # we use EOT because end of *text* is more accurate for what we're doing than end of *sentence*
+        return self.tokenizer.eos_token_id
+
+    @property
+    def max_length(self):
+        try:
+            return self.gpt2.config.n_ctx
+        except AttributeError:
+            # gptneoconfig doesn't have n_ctx apparently
+            return self.gpt2.config.max_position_embeddings
+
+    @property
+    def max_gen_toks(self):
+        return 256
+
+    @property
+    def batch_size(self):
+        # TODO: fix multi-gpu
+        return self.batch_size_per_gpu  # * gpus
+
+    @property
+    def device(self):
+        # TODO: fix multi-gpu
+        return self._device
+
+    def tok_encode(self, string: str):
+        return self.tokenizer.encode(string, add_special_tokens=False)
+
+    def tok_decode(self, tokens):
+        return self.tokenizer.decode(tokens)
+
+    def _model_call(self, inps):
+        """
+        inps: a torch tensor of shape [batch, sequence]
+        the size of sequence may vary from call to call
+
+        returns: a torch tensor of shape [batch, sequence, vocab] with the
+        logits returned from the model
+        """
+        #with torch.no_grad():
+        attention_mask = inps.clone()
+        attention_mask[:] = 1.0
+        return self.gpt2(inps, attention_mask)[0]
+
+    def _model_generate(self, context, max_length, eos_token_id):
+        generation_kwargs = {'do_sample': False, 'max_length': max_length}
+        if eos_token_id is not None:
+            generation_kwargs['eos_token_id'] = eos_token_id
+            generation_kwargs['pad_token_id'] = eos_token_id # setting eos_token_id as pad token
+        return self.gpt2.generate(context, **generation_kwargs)
--- a/lm_eval/tasks/bigbench.py
+++ b/lm_eval/tasks/bigbench.py
@@ -10,6 +10,7 @@ import functools
 import numpy as np
 import re
 import importlib.resources
+import importlib_resources
 from lm_eval.base import rf, Task
 from lm_eval.metrics import mean

@@ -229,7 +230,10 @@ def create_task_from_path(json_path):


 def create_all_tasks():
-    resources_dir = importlib.resources.files("lm_eval.datasets") / "bigbench_resources"
+    try:
+        resources_dir = importlib.resources.files("lm_eval.datasets") / "bigbench_resources"
+    except:
+        resources_dir = importlib_resources.files("lm_eval.datasets") / "bigbench_resources"
    supported_tasks = [os.path.splitext(x)[0] for x in os.listdir(resources_dir)]
    res = {}
    for task_name in supported_tasks:

--- a/main.py
+++ b/main.py
@@ -12,6 +12,7 @@ def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model", required=True)
    parser.add_argument("--model_args", default="")
+    parser.add_argument("--tokenizer", default=None)
    parser.add_argument("--tasks", default=None, choices=utils.MultiChoice(tasks.ALL_TASKS))
    parser.add_argument("--provide_description", action="store_true")
    parser.add_argument("--num_fewshot", type=int, default=0)
@@ -66,6 +67,7 @@ def main():
        description_dict=description_dict,
        decontamination_ngrams_path=args.decontamination_ngrams_path,
        check_integrity=args.check_integrity,
+        tokenizer=args.tokenizer,
        write_out=args.write_out,
        output_base_path=args.output_base_path,
    )

--- a/setup.py
+++ b/setup.py
@@ -19,7 +19,7 @@ setuptools.setup(
        "License :: OSI Approved :: MIT License",
        "Operating System :: OS Independent",
    ],
-    python_requires=">=3.9",
+    python_requires=">=3.8",
    install_requires=[
        "datasets>=2.0.0",
        "jsonlines",