Merge pull request #414 from zanussbaum/master

feat: evaluation using peft models with CLM

Merge pull request #414 from zanussbaum/master
feat: evaluation using peft models with CLM
5c95b907 · Stella Biderman · GitHub · 44275ae9 · 83c2ff16 · 5c95b907
Unverified Commit 5c95b907 authored Apr 17, 2023 by Stella Biderman Committed by GitHub Apr 17, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 54 additions and 1 deletion

README.md README.md +11 -1

lm_eval/models/huggingface.py lm_eval/models/huggingface.py +41 -0

setup.py setup.py +2 -0

No files found.
--- a/README.md
+++ b/README.md
@@ -10,7 +10,8 @@ This project provides a unified framework to test generative language models on
 Features:

 - 200+ tasks implemented. See the [task-table](./docs/task_table.md) for a complete list.
- Support for the Hugging Face `transformers` library, GPT-NeoX, Megatron-DeepSpeed, and the OpenAI API, with flexible tokenization-agnostic interface.
+- Support for GPT-2, GPT-3, GPT-Neo, GPT-NeoX, and GPT-J, with flexible tokenization-agnostic interface.
+- Support for evaluation on adapters (e.g. LoRa) supported in [HuggingFace's PEFT library](https://github.com/huggingface/peft).
 - Task versioning to ensure reproducibility.

 ## Install
@@ -58,6 +59,15 @@ To evaluate models that are called via `AutoSeq2SeqLM`, you instead use `hf-seq2

 > **Warning**: Choosing the wrong model may result in erroneous outputs despite not erroring.

+To use with [PEFT](https://github.com/huggingface/peft), take the call you would run to evaluate the base model and add `,peft=PATH` to the `model_args` argument as shown below:
+```bash
+python main.py \
+    --model hf-causal \
+    --model_args pretrained=EleutherAI/gpt-j-6b,peft=nomic-ai/gpt4all-j-lora \
+    --tasks openbookqa,arc_easy,winogrande,hellaswag,arc_challenge,piqa,boolq \ 
+    --device cuda:0 
+```
+
 Our library also supports the OpenAI API:

 ```bash

--- a/lm_eval/models/huggingface.py
+++ b/lm_eval/models/huggingface.py
@@ -2,6 +2,7 @@ import math
 import torch
 import torch.nn.functional as F
 import transformers
+import peft
 from typing import List, Mapping, NewType, Optional, Tuple, Union
 from tqdm import tqdm

@@ -58,6 +59,7 @@ class HuggingFaceAutoLM(BaseLM):
    AUTO_CONFIG_CLASS: transformers.AutoConfig = transformers.AutoConfig
    AUTO_TOKENIZER_CLASS: transformers.AutoTokenizer = transformers.AutoTokenizer
    AUTO_MODEL_CLASS: transformers.AutoModel = None
+    AUTO_PEFT_CLASS: peft.PeftModel = None

    # Default max sequence length setting for when no `max_length` is provided
    # or no max length config setting is found in the model or tokenizer.
@@ -80,6 +82,7 @@ class HuggingFaceAutoLM(BaseLM):
        offload_folder: Optional[str] = "./offload",
        dtype: Optional[Union[str, torch.dtype]] = None,
        device: Optional[Union[int, str]] = "cuda",
+        peft: str = None,
    ):
        """Initializes a HuggingFace `AutoModel` and `AutoTokenizer` for evaluation.
        Args:
@@ -124,6 +127,10 @@ class HuggingFaceAutoLM(BaseLM):
                Converts the model weights to `dtype`, if specified. Strings get
                converted to `torch.dtype` objects (e.g. `float16` -> `torch.float16`).
                Use `dtype="auto"` to derive the type from the model’s weights.
+            peft (str, optional, defaults to None):
+                Path of the adapter weights to load from Huggingface. This will usually
+                include a directory that includes the files `adapter_config.json` and 
+                `adapter_model.bin`. Compatible with [PEFT](https://github.com/huggingface/peft)
        """
        super().__init__()

@@ -175,6 +182,16 @@ class HuggingFaceAutoLM(BaseLM):
            torch_dtype=_get_dtype(dtype, self._config),
            **accelerate_kwargs,
        )
+        # note: peft_path can be different than pretrained model path
+        if peft is not None:
+            self.model = self._create_auto_model_peft(
+                model=self.model,
+                peft=peft,
+                revision=revision,
+                subfolder=subfolder,
+                torch_dtype=_get_dtype(dtype, self._config),
+                **accelerate_kwargs,
+            )
        self.model.eval()
        torch.set_grad_enabled(False)

@@ -208,6 +225,29 @@ class HuggingFaceAutoLM(BaseLM):
            torch_dtype=torch_dtype,
        )
        return model
+        
+    def _create_auto_model_peft(
+        self,
+        *,
+        model: transformers.PreTrainedModel,
+        peft: str,
+        revision: str,
+        subfolder: str,
+        device_map: Optional[Union[str, _DeviceMapping]] = None,
+        max_memory: Optional[dict] = None,
+        offload_folder: Optional[str] = None,
+        torch_dtype: Optional[Union[str, torch.dtype]] = None,
+    ):
+        model = self.AUTO_PEFT_CLASS.from_pretrained(
+            model,
+            peft,
+            revision=revision + ("/" + subfolder if subfolder is not None else ""),
+            device_map=device_map,
+            max_memory=max_memory,
+            offload_folder=offload_folder,
+            torch_dtype=torch_dtype,
+        )
+        return model

    def _create_auto_tokenizer(
        self,
@@ -362,6 +402,7 @@ class AutoCausalLM(HuggingFaceAutoLM):
    """

    AUTO_MODEL_CLASS = transformers.AutoModelForCausalLM
+    AUTO_PEFT_CLASS = peft.PeftModel

    def _create_auto_tokenizer(
        self,

--- a/setup.py
+++ b/setup.py
@@ -26,6 +26,7 @@ setuptools.setup(
        "numexpr",
        "openai>=0.6.4",
        "omegaconf>=2.2",
+        "peft>=0.2.0"
        "pybind11>=2.6.2",
        "pycountry",
        "pytablewriter",
@@ -41,5 +42,6 @@ setuptools.setup(
    extras_require={
        "dev": ["black", "flake8", "pre-commit", "pytest", "pytest-cov"],
        "multilingual": ["nagisa>=0.2.7", "jieba>=0.42.1"],
+        "sentencepiece": ["sentencepiece>=0.1.98", "protobuf>=4.22.1"]
    },
 )