Unverified Commit 4f8e479e authored by Qubitium-ModelCloud's avatar Qubitium-ModelCloud Committed by GitHub
Browse files

Add GPTQModel support for evaluating GPTQ models (#2217)



* support gptqmodel

* code opt

* add gptqmodel option

* Update huggingface.py

* Update pyproject.toml

* gptqmodel version upgraded to 1.0.6

* GPTQModel version upgraded to 1.0.8

* Update pyproject.toml

* fix ruff-format error

* add gptqmodel test

* Update gptqmodel test model

* skip cuda

* python3.8 compatible

* Update README.md

* Update README.md

---------
Co-authored-by: default avatarCL-ModelCloud <cl@modelcloud.ai>
parent 57272b63
......@@ -39,7 +39,7 @@ This project provides a unified framework to test generative language models on
**Features:**
- Over 60 standard academic benchmarks for LLMs, with hundreds of subtasks and variants implemented.
- Support for models loaded via [transformers](https://github.com/huggingface/transformers/) (including quantization via [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ)), [GPT-NeoX](https://github.com/EleutherAI/gpt-neox), and [Megatron-DeepSpeed](https://github.com/microsoft/Megatron-DeepSpeed/), with a flexible tokenization-agnostic interface.
- Support for models loaded via [transformers](https://github.com/huggingface/transformers/) (including quantization via [GPTQModel](https://github.com/ModelCloud/GPTQModel) and [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ)), [GPT-NeoX](https://github.com/EleutherAI/gpt-neox), and [Megatron-DeepSpeed](https://github.com/microsoft/Megatron-DeepSpeed/), with a flexible tokenization-agnostic interface.
- Support for fast and memory-efficient inference with [vLLM](https://github.com/vllm-project/vllm).
- Support for commercial APIs including [OpenAI](https://openai.com), and [TextSynth](https://textsynth.com/).
- Support for evaluation on adapters (e.g. LoRA) supported in [HuggingFace's PEFT library](https://github.com/huggingface/peft).
......@@ -319,8 +319,16 @@ lm_eval --model hf \
--tasks hellaswag
```
[GPTQ](https://github.com/PanQiWei/AutoGPTQ) quantized models can be loaded by specifying their file names in `,autogptq=NAME` (or `,autogptq=True` for default names) in the `model_args` argument:
GPTQ quantized models can be loaded using [GPTQModel](https://github.com/ModelCloud/GPTQModel) (faster) or [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ)
GPTQModel: add `,gptqmodel=True` to `model_args`
```bash
lm_eval --model hf \
--model_args pretrained=model-name-or-path,gptqmodel=True \
--tasks hellaswag
```
AutoGPTQ: add `,autogptq=True` to `model_args`:
```bash
lm_eval --model hf \
--model_args pretrained=model-name-or-path,autogptq=model.safetensors,gptq_use_triton=True \
......
......@@ -87,6 +87,7 @@ class HFLM(TemplateLM):
peft: Optional[str] = None,
delta: Optional[str] = None,
autogptq: Optional[Union[bool, str]] = False,
gptqmodel: Optional[bool] = False,
**kwargs,
) -> None:
super().__init__()
......@@ -192,6 +193,7 @@ class HFLM(TemplateLM):
peft=peft,
delta=delta,
autogptq=autogptq,
gptqmodel=gptqmodel,
**kwargs,
)
......@@ -530,6 +532,7 @@ class HFLM(TemplateLM):
peft: Optional[str] = None,
delta: Optional[str] = None,
autogptq: Optional[Union[bool, str]] = False,
gptqmodel: Optional[bool] = False,
**kwargs,
) -> None:
"""
......@@ -557,7 +560,7 @@ class HFLM(TemplateLM):
)
)
if not autogptq:
if not autogptq and not gptqmodel:
if model_kwargs.get("load_in_4bit", None):
assert (
transformers.__version__ >= "4.30.0"
......@@ -577,23 +580,42 @@ class HFLM(TemplateLM):
**model_kwargs,
)
else:
try:
from auto_gptq import AutoGPTQForCausalLM
except ModuleNotFoundError as exception:
raise type(exception)(
"Tried to load auto_gptq, but auto-gptq is not installed ",
"please install auto-gptq via pip install lm-eval[gptq] or pip install -e .[gptq]",
if autogptq and gptqmodel:
raise ValueError(
"Cannot use both 'autogptq' and 'gptqmodel' options at the same time."
)
self._model = AutoGPTQForCausalLM.from_quantized(
pretrained,
trust_remote_code=trust_remote_code,
model_basename=None if autogptq is True else Path(autogptq).stem,
use_safetensors=True
if autogptq is True
else autogptq.endswith(".safetensors"),
**model_kwargs,
)
if autogptq:
try:
from auto_gptq import AutoGPTQForCausalLM
except ModuleNotFoundError as exception:
raise type(exception)(
"Tried to load auto_gptq, but auto-gptq is not installed ",
"please install auto-gptq via pip install lm-eval[gptq] or pip install -e .[gptq]",
)
self._model = AutoGPTQForCausalLM.from_quantized(
pretrained,
trust_remote_code=trust_remote_code,
model_basename=None if autogptq is True else Path(autogptq).stem,
use_safetensors=True
if autogptq is True
else autogptq.endswith(".safetensors"),
**model_kwargs,
)
if gptqmodel:
try:
from gptqmodel import GPTQModel
except ModuleNotFoundError as exception:
raise type(exception)(
"Tried to load gptqmodel, but gptqmodel is not installed ",
"please install gptqmodel via `pip install gptqmodel --no-build-isolation` or `pip install lm-eval[gptqmodel] --no-build-isolation`",
)
self._model = GPTQModel.from_quantized(
pretrained, trust_remote_code=trust_remote_code, **model_kwargs
)
if peft and delta:
raise ValueError(
......
......@@ -76,6 +76,7 @@ testing = ["pytest", "pytest-cov", "pytest-xdist"]
vllm = ["vllm>=0.4.2"]
zeno = ["pandas", "zeno-client"]
wandb = ["wandb>=0.16.3", "pandas", "numpy"]
gptqmodel = ["gptqmodel>=1.0.9"]
all = [
"lm_eval[anthropic]",
"lm_eval[dev]",
......
from typing import List
import pytest
import lm_eval
def assert_less_than(value, threshold, desc):
if value is not None:
assert float(value) < threshold, f"{desc} should be less than {threshold}"
@pytest.mark.skip(reason="requires CUDA")
class Test_GPTQModel:
gptqmodel = pytest.importorskip("gptqmodel", minversion="1.0.9")
MODEL_ID = "ModelCloud/Opt-125-GPTQ-4bit-10-25-2024"
def test_gptqmodel(self) -> None:
acc = "acc"
acc_norm = "acc_norm"
acc_value = None
acc_norm_value = None
task = "arc_easy"
model_args = f"pretrained={self.MODEL_ID},gptqmodel=True"
tasks: List[str] = [task]
results = lm_eval.simple_evaluate(
model="hf",
model_args=model_args,
tasks=tasks,
device="cuda",
)
column = "results"
dic = results.get(column, {}).get(self.task)
if dic is not None:
if "alias" in dic:
_ = dic.pop("alias")
items = sorted(dic.items())
for k, v in items:
m, _, f = k.partition(",")
if m.endswith("_stderr"):
continue
if m == acc:
acc_value = "%.4f" % v if isinstance(v, float) else v
if m == acc_norm:
acc_norm_value = "%.4f" % v if isinstance(v, float) else v
assert_less_than(acc_value, 0.43, "acc")
assert_less_than(acc_norm_value, 0.39, "acc_norm")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment