Unverified Commit bf783d60 authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

Merge branch 'master' into deepsparselm

parents d1139741 33a215c7
...@@ -81,6 +81,7 @@ To evaluate models that are loaded via `AutoSeq2SeqLM` in Huggingface, you inste ...@@ -81,6 +81,7 @@ To evaluate models that are loaded via `AutoSeq2SeqLM` in Huggingface, you inste
> **Warning**: Choosing the wrong model may result in erroneous outputs despite not erroring. > **Warning**: Choosing the wrong model may result in erroneous outputs despite not erroring.
### Neural Magic `deepsparse` ### Neural Magic `deepsparse`
Models from [SparseZoo](https://sparsezoo.neuralmagic.com/) can be evaluated directly in lm-evaluation-harness using [DeepSparse](https://github.com/neuralmagic/deepsparse): Models from [SparseZoo](https://sparsezoo.neuralmagic.com/) can be evaluated directly in lm-evaluation-harness using [DeepSparse](https://github.com/neuralmagic/deepsparse):
...@@ -95,6 +96,11 @@ python main.py --model deepsparse --model_args pretrained=hf:mgoin/TinyLlama-1.1 ...@@ -95,6 +96,11 @@ python main.py --model deepsparse --model_args pretrained=hf:mgoin/TinyLlama-1.1
python main.py --model deepsparse --model_args pretrained=hf:neuralmagic/mpt-7b-gsm8k-pruned60-quant-ds --tasks gsm8k python main.py --model deepsparse --model_args pretrained=hf:neuralmagic/mpt-7b-gsm8k-pruned60-quant-ds --tasks gsm8k
``` ```
### OpenVINO models converted via HuggingFace Optimum
```bash
python main.py --model optimum-causal --model_args pretrained=<model_path_or_name> --task lambada_openai
```
### Commercial APIs ### Commercial APIs
Our library also supports language models served via the OpenAI API: Our library also supports language models served via the OpenAI API:
......
...@@ -18,7 +18,8 @@ MODEL_REGISTRY = { ...@@ -18,7 +18,8 @@ MODEL_REGISTRY = {
"textsynth": textsynth.TextSynthLM, "textsynth": textsynth.TextSynthLM,
"deepsparse": deepsparse.DeepSparseLM, "deepsparse": deepsparse.DeepSparseLM,
"dummy": dummy.DummyLM, "dummy": dummy.DummyLM,
"gguf": gguf.GGUFLM "gguf": gguf.GGUFLM,
"optimum-causal": gpt2.OPTIMUMLM,
} }
......
...@@ -175,3 +175,116 @@ class HFLM(BaseLM): ...@@ -175,3 +175,116 @@ class HFLM(BaseLM):
# for backwards compatibility # for backwards compatibility
GPT2LM = HFLM GPT2LM = HFLM
class OPTIMUMLM(BaseLM):
def __init__(
self,
device="cpu",
pretrained="gpt2",
revision="main",
low_cpu_mem_usage=None,
subfolder=None,
tokenizer=None,
batch_size=1,
load_in_8bit: Optional[bool] = False,
trust_remote_code: Optional[bool] = False,
):
super().__init__()
import optimum
from optimum.intel.openvino import OVModelForCausalLM
assert isinstance(device, str)
assert isinstance(pretrained, str)
assert isinstance(batch_size, (int,str))
device_list = set(["cuda", "cpu"] + [f'cuda:{i}' for i in range(torch.cuda.device_count())])
if device and device in device_list:
self._device = torch.device(device)
print(f"Using device '{device}'")
else:
print("Device not specified")
print(f"Cuda Available? {torch.cuda.is_available()}")
self._device = (
torch.device("cuda")
if torch.cuda.is_available()
else torch.device("cpu")
)
# TODO: update this to be less of a hack once subfolder is fixed in HF
revision = revision + ("/" + subfolder if subfolder is not None else "")
self.gpt2 = OVModelForCausalLM.from_pretrained(
pretrained,
load_in_8bit=load_in_8bit,
revision=revision,
trust_remote_code=trust_remote_code,
use_cache=True,
)
try:
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
pretrained if tokenizer is None else tokenizer,
revision=revision,
trust_remote_code=trust_remote_code,
)
except:
print("Tokenizer is missed. Plaase save it into the same folder with the model.")
self.vocab_size = self.tokenizer.vocab_size
# setup for automatic batch size detection
if batch_size == 'auto':
self.batch_size_per_gpu = batch_size
else:
self.batch_size_per_gpu = int(batch_size)
@property
def eot_token_id(self):
# we use EOT because end of *text* is more accurate for what we're doing than end of *sentence*
return self.tokenizer.eos_token_id
@property
def max_length(self):
try:
return self.gpt2.config.n_ctx
except AttributeError:
# gptneoconfig doesn't have n_ctx apparently
return self.gpt2.config.max_position_embeddings
@property
def max_gen_toks(self):
return 256
@property
def batch_size(self):
# TODO: fix multi-gpu
return self.batch_size_per_gpu # * gpus
@property
def device(self):
# TODO: fix multi-gpu
return self._device
def tok_encode(self, string: str):
return self.tokenizer.encode(string, add_special_tokens=False)
def tok_decode(self, tokens):
return self.tokenizer.decode(tokens)
def _model_call(self, inps):
"""
inps: a torch tensor of shape [batch, sequence]
the size of sequence may vary from call to call
returns: a torch tensor of shape [batch, sequence, vocab] with the
logits returned from the model
"""
return self.gpt2(inps)[0]
def _model_generate(self, context, max_length, eos_token_id):
generation_kwargs = {'do_sample': False, 'max_length': max_length}
if eos_token_id is not None:
generation_kwargs['eos_token_id'] = eos_token_id
generation_kwargs['pad_token_id'] = eos_token_id # setting eos_token_id as pad token
return self.gpt2.generate(context, **generation_kwargs)
...@@ -10,6 +10,7 @@ import functools ...@@ -10,6 +10,7 @@ import functools
import numpy as np import numpy as np
import re import re
import importlib.resources import importlib.resources
import importlib_resources
from lm_eval.base import rf, Task from lm_eval.base import rf, Task
from lm_eval.metrics import mean from lm_eval.metrics import mean
......
...@@ -51,5 +51,6 @@ setuptools.setup( ...@@ -51,5 +51,6 @@ setuptools.setup(
"auto-gptq": ["auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"], "auto-gptq": ["auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"],
"anthropic": ["anthropic"], "anthropic": ["anthropic"],
"deepsparse": ["deepsparse-nightly[llm]"], "deepsparse": ["deepsparse-nightly[llm]"],
"openvino": ["openvino", "nncf", "onnx", "optimum-intel @ git+https://github.com/huggingface/optimum-intel.git"],
}, },
) )
import os
import tempfile
import lm_eval.base as base
import lm_eval.tasks as tasks
import lm_eval.models as models
import lm_eval.evaluator as evaluator
import random
import pytest
from transformers import AutoTokenizer
from optimum.intel import OVModelForCausalLM
SUPPORTED_ARCHITECTURES_TASKS = {
"facebook/opt-125m": "lambada_openai",
"hf-internal-testing/tiny-random-gpt2": "wikitext"
}
@pytest.mark.parametrize("model_id,task", SUPPORTED_ARCHITECTURES_TASKS.items())
def test_evaluator(model_id, task):
with tempfile.TemporaryDirectory() as tmpdirname:
model = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True)
model.save_pretrained(tmpdirname)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.save_pretrained(tmpdirname)
lm = models.get_model("optimum-causal").create_from_arg_string(
f"pretrained={tmpdirname}",
{
"batch_size": 1,
"device": "cpu",
},
)
task_dict = tasks.get_task_dict([task])
def ll_fn(reqs):
for ctx, cont in reqs:
if len(ctx) == 0:
continue
# space convention
assert ctx[-1] != " "
assert cont[0] == " " or ctx[-1] == "\n"
res = []
random.seed(42)
for _ in reqs:
res.append((-random.random(), False))
return res
def ll_perp_fn(reqs):
for (string,) in reqs:
assert isinstance(string, str)
res = []
random.seed(42)
for _ in reqs:
res.append(-random.random())
return res
lm.loglikelihood = ll_fn
lm.loglikelihood_rolling = ll_perp_fn
limit = 10
evaluator.evaluate(
lm=lm,
task_dict=task_dict,
num_fewshot=0,
limit=limit,
bootstrap_iters=10,
description_dict=None,
)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment