Unverified Commit 6179fdf5 authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

Merge pull request #427 from EleutherAI/hotfix-hf-causal

Hotfix: patch issues with the `huggingface.py` model classes
parents 977b281a 62d7937b
......@@ -55,14 +55,14 @@ python main.py \
--device cuda:0
```
To evaluate models that are called via `AutoSeq2SeqLM`, you instead use `hf-seq2seq`.
To evaluate models that are loaded via `AutoSeq2SeqLM` in Huggingface, you instead use `hf-seq2seq`. *To evaluate (causal) models across multiple GPUs, use `--model hf-causal-experimental`. Note that this is *
> **Warning**: Choosing the wrong model may result in erroneous outputs despite not erroring.
To use with [PEFT](https://github.com/huggingface/peft), take the call you would run to evaluate the base model and add `,peft=PATH` to the `model_args` argument as shown below:
```bash
python main.py \
--model hf-causal \
--model hf-causal-experimental \
--model_args pretrained=EleutherAI/gpt-j-6b,peft=nomic-ai/gpt4all-j-lora \
--tasks openbookqa,arc_easy,winogrande,hellaswag,arc_challenge,piqa,boolq \
--device cuda:0
......
......@@ -6,7 +6,8 @@ from . import dummy
MODEL_REGISTRY = {
"hf": gpt2.HFLM,
"hf-causal": huggingface.AutoCausalLM,
"hf-causal": gpt2.HFLM,
"hf-causal-experimental": huggingface.AutoCausalLM,
"hf-seq2seq": huggingface.AutoSeq2SeqLM,
"gpt2": gpt2.GPT2LM,
"gpt3": gpt3.GPT3LM,
......
......@@ -55,28 +55,8 @@ class HFLM(BaseLM):
trust_remote_code=trust_remote_code,
)
assert isinstance(
self.tokenizer,
(
transformers.GPT2Tokenizer,
transformers.GPT2TokenizerFast,
transformers.T5Tokenizer,
transformers.T5TokenizerFast,
),
), "this tokenizer has not been checked for compatibility yet!"
self.vocab_size = self.tokenizer.vocab_size
if isinstance(
self.tokenizer, (transformers.GPT2Tokenizer, transformers.GPT2TokenizerFast)
):
assert self.tokenizer.encode("hello\n\nhello") == [
31373,
198,
198,
31373,
], self.tokenizer.encode("hello\n\nhello")
# multithreading and batching
self.batch_size_per_gpu = batch_size # todo: adaptive batch size
......
......@@ -361,7 +361,7 @@ class HuggingFaceAutoLM(BaseLM):
def tok_decode(self, tokens: torch.LongTensor) -> List[str]:
return self.tokenizer.batch_decode(tokens, skip_special_tokens=True)
def greedy_until(self, requests: List[Tuple[str, dict]]) -> List[str]:
def greedy_until(self, requests: List[Tuple[str, Union[List[str], str]]]) -> List[str]:
def _collate(x):
tokens = self.tok_encode(x[0])
return len(tokens), x[0]
......@@ -373,18 +373,16 @@ class HuggingFaceAutoLM(BaseLM):
):
context = [c[0] for c in chunk]
request_args = chunk[0][1]
stop_sequences = request_args["stop_sequences"]
max_generation_length = request_args["max_generation_length"]
num_fewshot = request_args["num_fewshot"]
stop_sequences = request_args if isinstance(request_args, list) else [request_args] # request_args["stop_sequences"]
max_generation_length = self._max_gen_toks # request_args["max_generation_length"]
assert (
isinstance(max_generation_length, int) or max_generation_length is None
)
assert isinstance(stop_sequences, list) or stop_sequences is None
assert isinstance(num_fewshot, int) or num_fewshot is None
# TODO: Find a better way to handle stop sequences for 0-shot.
if stop_sequences is None or num_fewshot == 0:
if stop_sequences is None:
until = [self.eot_token]
else:
until = stop_sequences + [self.eot_token]
......
......@@ -5,7 +5,9 @@ import collections
import functools
import inspect
import sys
from typing import List
from typing import List, Union
import torch
from omegaconf import OmegaConf
......@@ -116,6 +118,26 @@ def make_disjoint_window(pair):
return a[: len(a) - (len(b) - 1)], b
def select_continuation_from_batch_left_padding(
generations: Union[List[List[int]], torch.Tensor], max_context_size: int
):
"""Select the continuation from the batch, removing prompts of different lengths.
Args:
generations (Union[List[List[int]], torch.Tensor]):
A tensor or list-of-lists of shape [batch_size, sequence length].
max_context_size (int):
The size of the biggest context; generations will proceed from that
index.
Example:
PAD PAD Continue : The dog chased the cat [every day of the week]
Riddle me this : The dog chased the cat [yesterday] PAD PAD PAD PAD
Output:
[every day of the week]
[yesterday] PAD PAD PAD PAD
"""
return generations[:, max_context_size:]
class Reorderer:
def __init__(self, arr, fn):
self.size = len(arr)
......@@ -201,3 +223,4 @@ def run_task_tests(task_list: List[str]):
raise ValueError(
f"Not all tests for the specified tasks ({task_list}) ran successfully! Error code: {pytest_return_val}"
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment