Commit 810fd2e5 authored by Alexander's avatar Alexander
Browse files

Integrated Optimum models

parent b21c8f3d
......@@ -23,6 +23,7 @@ def simple_evaluate(
description_dict=None,
check_integrity=False,
decontamination_ngrams_path=None,
tokenizer=None,
write_out=False,
output_base_path=None,
):
......@@ -67,7 +68,7 @@ def simple_evaluate(
if model_args is None:
model_args = ""
lm = lm_eval.models.get_model(model).create_from_arg_string(
model_args, {"batch_size": batch_size, "device": device}
model_args, {"batch_size": batch_size, "device": device, "tokenizer": tokenizer, "trust_remote_code": True}
)
else:
assert isinstance(model, lm_eval.base.LM)
......
......@@ -15,6 +15,7 @@ MODEL_REGISTRY = {
"anthropic": anthropic_llms.AnthropicLM,
"textsynth": textsynth.TextSynthLM,
"dummy": dummy.DummyLM,
"optimum-causal": gpt2.OPTIMUMLM,
}
......
......@@ -2,6 +2,8 @@ import torch
import transformers
from typing import Optional, Union
from lm_eval.base import BaseLM
import optimum
from optimum.intel.openvino import OVModelForCausalLM
def _get_dtype(
......@@ -142,3 +144,125 @@ class HFLM(BaseLM):
# for backwards compatibility
GPT2LM = HFLM
class OPTIMUMLM(BaseLM):
def __init__(
self,
device="cpu",
pretrained="gpt2",
revision="main",
low_cpu_mem_usage=None,
subfolder=None,
tokenizer=None,
batch_size=1,
load_in_8bit: Optional[bool] = False,
trust_remote_code: Optional[bool] = False,
):
super().__init__()
assert isinstance(device, str)
assert isinstance(pretrained, str)
assert isinstance(batch_size, (int,str))
device_list = set(["cuda", "cpu"] + [f'cuda:{i}' for i in range(torch.cuda.device_count())])
if device and device in device_list:
self._device = torch.device(device)
print(f"Using device '{device}'")
else:
print("Device not specified")
print(f"Cuda Available? {torch.cuda.is_available()}")
self._device = (
torch.device("cuda")
if torch.cuda.is_available()
else torch.device("cpu")
)
# TODO: update this to be less of a hack once subfolder is fixed in HF
revision = revision + ("/" + subfolder if subfolder is not None else "")
self.gpt2 = OVModelForCausalLM.from_pretrained(
pretrained,
# load_in_8bit=load_in_8bit,
# low_cpu_mem_usage=low_cpu_mem_usage,
revision=revision,
trust_remote_code=trust_remote_code,
use_cache=True,
)
#self.gpt2.eval()
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
pretrained if tokenizer is None else tokenizer,
revision=revision,
trust_remote_code=trust_remote_code,
)
self.vocab_size = self.tokenizer.vocab_size
# if isinstance(
# self.tokenizer, (transformers.GPT2Tokenizer, transformers.GPT2TokenizerFast)
# ):
# assert self.tokenizer.encode("hello\n\nhello") == [
# 31373,
# 198,
# 198,
# 31373,
# ], self.tokenizer.encode("hello\n\nhello")
# setup for automatic batch size detection
if batch_size == 'auto':
self.batch_size_per_gpu = batch_size
else:
self.batch_size_per_gpu = int(batch_size)
@property
def eot_token_id(self):
# we use EOT because end of *text* is more accurate for what we're doing than end of *sentence*
return self.tokenizer.eos_token_id
@property
def max_length(self):
try:
return self.gpt2.config.n_ctx
except AttributeError:
# gptneoconfig doesn't have n_ctx apparently
return self.gpt2.config.max_position_embeddings
@property
def max_gen_toks(self):
return 256
@property
def batch_size(self):
# TODO: fix multi-gpu
return self.batch_size_per_gpu # * gpus
@property
def device(self):
# TODO: fix multi-gpu
return self._device
def tok_encode(self, string: str):
return self.tokenizer.encode(string, add_special_tokens=False)
def tok_decode(self, tokens):
return self.tokenizer.decode(tokens)
def _model_call(self, inps):
"""
inps: a torch tensor of shape [batch, sequence]
the size of sequence may vary from call to call
returns: a torch tensor of shape [batch, sequence, vocab] with the
logits returned from the model
"""
#with torch.no_grad():
attention_mask = inps.clone()
attention_mask[:] = 1.0
return self.gpt2(inps, attention_mask)[0]
def _model_generate(self, context, max_length, eos_token_id):
generation_kwargs = {'do_sample': False, 'max_length': max_length}
if eos_token_id is not None:
generation_kwargs['eos_token_id'] = eos_token_id
generation_kwargs['pad_token_id'] = eos_token_id # setting eos_token_id as pad token
return self.gpt2.generate(context, **generation_kwargs)
......@@ -10,6 +10,7 @@ import functools
import numpy as np
import re
import importlib.resources
import importlib_resources
from lm_eval.base import rf, Task
from lm_eval.metrics import mean
......@@ -229,7 +230,10 @@ def create_task_from_path(json_path):
def create_all_tasks():
resources_dir = importlib.resources.files("lm_eval.datasets") / "bigbench_resources"
try:
resources_dir = importlib.resources.files("lm_eval.datasets") / "bigbench_resources"
except:
resources_dir = importlib_resources.files("lm_eval.datasets") / "bigbench_resources"
supported_tasks = [os.path.splitext(x)[0] for x in os.listdir(resources_dir)]
res = {}
for task_name in supported_tasks:
......
......@@ -12,6 +12,7 @@ def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--model", required=True)
parser.add_argument("--model_args", default="")
parser.add_argument("--tokenizer", default=None)
parser.add_argument("--tasks", default=None, choices=utils.MultiChoice(tasks.ALL_TASKS))
parser.add_argument("--provide_description", action="store_true")
parser.add_argument("--num_fewshot", type=int, default=0)
......@@ -66,6 +67,7 @@ def main():
description_dict=description_dict,
decontamination_ngrams_path=args.decontamination_ngrams_path,
check_integrity=args.check_integrity,
tokenizer=args.tokenizer,
write_out=args.write_out,
output_base_path=args.output_base_path,
)
......
......@@ -19,7 +19,7 @@ setuptools.setup(
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
python_requires=">=3.9",
python_requires=">=3.8",
install_requires=[
"datasets>=2.0.0",
"jsonlines",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment