Unverified Commit 904bba12 authored by Baber Abbasi's avatar Baber Abbasi Committed by GitHub
Browse files

Pin datasets < 4.0.0 (#3172)

* Fix: pin datasets < 4.0

* fix

* update type hints in HF

* fix hellaswag path
parent 2eea3f50
......@@ -433,8 +433,10 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
# because it's already been determined based on the prior env var before launching our
# script--`datasets` gets imported by lm_eval internally before these lines can update the env.
import datasets
from packaging.version import parse as vparse
datasets.config.HF_DATASETS_TRUST_REMOTE_CODE = True
if vparse(datasets.__version__) < vparse("4.0.0"):
datasets.config.HF_DATASETS_TRUST_REMOTE_CODE = True
if isinstance(args.model_args, dict):
args.model_args["trust_remote_code"] = True
......
......@@ -981,6 +981,10 @@ class ConfigurableTask(Task):
def download(
self, dataset_kwargs: Optional[Dict[str, Any]] = None, **kwargs
) -> None:
from packaging.version import parse as vparse
if dataset_kwargs and vparse(datasets.__version__) >= vparse("4.0.0"):
dataset_kwargs.pop("trust_remote_code", None)
if isinstance(self.config.custom_dataset, Callable):
eval_logger.warning(
f"{self.config.task}: Custom kwargs can be passed to `--metadata` in console (as json string) or to the TaskManager."
......
......@@ -3,9 +3,10 @@ from __future__ import annotations
import copy
import logging
import os
from collections.abc import Iterator, Sequence
from datetime import timedelta
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union
from typing import TYPE_CHECKING, Any, Literal
import jinja2
import torch
......@@ -19,6 +20,7 @@ from accelerate import (
from accelerate.utils import get_max_memory
from huggingface_hub import HfApi
from packaging import version
from packaging.version import parse as vparse
from tqdm import tqdm
from transformers.models.auto.modeling_auto import (
MODEL_FOR_CAUSAL_LM_MAPPING_NAMES,
......@@ -26,7 +28,6 @@ from transformers.models.auto.modeling_auto import (
)
from lm_eval import utils
from lm_eval.api.instance import Instance
from lm_eval.api.model import TemplateLM
from lm_eval.api.registry import register_model
from lm_eval.models.utils import (
......@@ -42,15 +43,17 @@ from lm_eval.models.utils import (
if TYPE_CHECKING:
from transformers.quantizers import AutoQuantizationConfig
from transformers.quantizers.auto import AutoQuantizationConfig
from lm_eval.api.instance import Instance
eval_logger = logging.getLogger(__name__)
TOKENIZER_INFINITY = 1000000000000000019884624838656
@register_model("hf-auto", "hf", "huggingface")
class HFLM(TemplateLM):
"""
An abstracted Huggingface model class. Enables usage with both models of
"""An abstracted Huggingface model class. Enables usage with both models of
`transformers.AutoModelForCausalLM` and `transformers.AutoModelForSeq2SeqLM` classes.
Supports data-parallel multi-GPU with HF Accelerate.
......@@ -61,48 +64,45 @@ class HFLM(TemplateLM):
def __init__(
self,
pretrained: Union[str, transformers.PreTrainedModel],
pretrained: str | transformers.PreTrainedModel,
backend: Literal["default", "causal", "seq2seq"] = "default",
# override whether the model should be treated as decoder-only (causal) or encoder-decoder (seq2seq)
revision: Optional[str] = "main",
revision: str | None = "main",
subfolder: str = "",
tokenizer: Optional[
Union[
str,
transformers.PreTrainedTokenizer,
transformers.PreTrainedTokenizerFast,
]
] = None,
truncation: Optional[bool] = False,
tokenizer: str
| transformers.PreTrainedTokenizer
| transformers.PreTrainedTokenizerFast
| None = None,
truncation: bool | None = False,
logits_cache: bool = True,
max_length: Optional[int] = None,
device: Optional[str] = "cuda",
dtype: Optional[Union[str, torch.dtype]] = "auto",
softmax_dtype: Optional[Union[str, torch.dtype]] = None,
mixed_precision_dtype: Optional[Union[str, torch.dtype]] = None,
batch_size: Optional[Union[int, str]] = 1,
max_batch_size: Optional[int] = 64,
trust_remote_code: Optional[bool] = False,
use_fast_tokenizer: Optional[bool] = True,
add_bos_token: Optional[bool] = False,
prefix_token_id: Optional[int] = None,
max_length: int | None = None,
device: str | None = "cuda",
dtype: str | torch.dtype | None = "auto",
softmax_dtype: str | torch.dtype | None = None,
mixed_precision_dtype: str | torch.dtype | None = None,
batch_size: int | str | None = 1,
max_batch_size: int | None = 64,
trust_remote_code: bool | None = False,
use_fast_tokenizer: bool | None = True,
add_bos_token: bool | None = False,
prefix_token_id: int | None = None,
# arguments used for splitting a model across GPUs naively.
# only used if `parallelize=True`.
parallelize: Optional[bool] = False,
max_memory_per_gpu: Optional[Union[int, str]] = None,
max_cpu_memory: Optional[Union[int, str]] = None,
offload_folder: Optional[Union[str, os.PathLike]] = "./offload",
parallelize: bool | None = False,
max_memory_per_gpu: int | str | None = None,
max_cpu_memory: int | str | None = None,
offload_folder: str | os.PathLike | None = "./offload",
# PEFT, delta weights and quantization options
peft: Optional[str] = None,
delta: Optional[str] = None,
autogptq: Optional[Union[bool, str]] = False,
gptqmodel: Optional[bool] = False,
gguf_file: Optional[str] = None,
peft: str | None = None,
delta: str | None = None,
autogptq: bool | str | None = False,
gptqmodel: bool | None = False,
gguf_file: str | None = None,
# end token for thinking, either the string or int token id.
# splits to get response after this token (if provided).
think_end_token: Union[str, int, None] = None,
think_end_token: str | int | None = None,
enable_thinking: bool | None = None,
chat_template_args: Optional[dict[str, Any]] = None,
chat_template_args: dict[str, Any] | None = None,
**kwargs,
) -> None:
super().__init__()
......@@ -280,18 +280,19 @@ class HFLM(TemplateLM):
self.batch_size_per_gpu = int(batch_size)
if isinstance(pretrained, str):
if gpus >= 1 or str(self.device) == "mps":
if (gpus >= 1 or str(self.device) == "mps") and not (
parallelize or autogptq or hasattr(self, "accelerator")
):
# TODO: can remove this whole snippet except in the mps case, perhaps?
if not (parallelize or autogptq or hasattr(self, "accelerator")):
# place model onto device requested manually,
# if not using HF Accelerate or device_map
# or any other option that preloads model onto device
try:
self.model.to(self.device)
except ValueError:
eval_logger.debug(
"Failed to place model onto specified device. This may be because the model is quantized via `bitsandbytes` or `device_map` is provided. If the desired GPU is being used, this message is safe to ignore."
)
# place model onto device requested manually,
# if not using HF Accelerate or device_map
# or any other option that preloads model onto device
try:
self.model.to(self.device)
except ValueError:
eval_logger.debug(
"Failed to place model onto specified device. This may be because the model is quantized via `bitsandbytes` or `device_map` is provided. If the desired GPU is being used, this message is safe to ignore."
)
# multigpu data-parallel support when launched with accelerate
if gpus > 1:
if accelerator.num_processes > 1:
......@@ -336,12 +337,12 @@ class HFLM(TemplateLM):
def _get_accelerate_args(
self,
parallelize: Optional[bool] = None,
device_map: Optional[str] = "auto",
max_memory_per_gpu: Optional[Union[int, str]] = None,
max_cpu_memory: Optional[Union[int, str]] = None,
offload_folder: Optional[str] = "./offload",
gpus: Optional[int] = None,
parallelize: bool | None = None,
device_map: str | None = "auto",
max_memory_per_gpu: int | str | None = None,
max_cpu_memory: int | str | None = None,
offload_folder: str | None = "./offload",
gpus: int | None = None,
) -> dict:
"""Returns the kwargs needed to apply `accelerate` in `AutoModel.from_pretrained`."""
num_local_processes = int(os.environ.get("LOCAL_WORLD_SIZE", 1))
......@@ -379,13 +380,8 @@ class HFLM(TemplateLM):
}
else: # Estimating the possible memory requirements
max_memory_all_gpus = get_max_memory()
if "cpu" in max_memory_all_gpus:
del max_memory_all_gpus["cpu"]
if not hasattr(self, "accelerator"):
max_memory_per_gpu_map = {
k: v for k, v in max_memory_all_gpus.items()
}
else:
max_memory_all_gpus.pop("cpu", None)
if hasattr(self, "accelerator"):
# use only 1 / num_processes of the GPUs if we are running under accelerate launch
max_memory_per_gpu_map = {
k: v
......@@ -393,6 +389,9 @@ class HFLM(TemplateLM):
if k % num_local_processes
== (self.accelerator.process_index % num_local_processes)
}
else:
max_memory_per_gpu_map = max_memory_all_gpus
args["max_memory"] = max_memory_per_gpu_map
args["device_map"] = "auto" if device_map is None else device_map
eval_logger.info(
......@@ -436,12 +435,12 @@ class HFLM(TemplateLM):
return self._model
@property
def eot_token_id(self):
def eot_token_id(self) -> int:
# we use EOT because end of *text* is more accurate for what we're doing than end of *sentence*
return self.tokenizer.eos_token_id
@property
def prefix_token_id(self):
def prefix_token_id(self) -> int:
# it is used as prefix for loglikelihood
if self.custom_prefix_token_id is not None:
return self.custom_prefix_token_id
......@@ -450,7 +449,7 @@ class HFLM(TemplateLM):
return self.tokenizer.eos_token_id
@property
def max_length(self):
def max_length(self) -> int:
if self._max_length: # if max length manually set, return it
return self._max_length
seqlen_config_attrs = ("n_positions", "max_position_embeddings", "n_ctx")
......@@ -458,7 +457,7 @@ class HFLM(TemplateLM):
if hasattr(self.model.config, attr):
return getattr(self.model.config, attr)
if hasattr(self.tokenizer, "model_max_length"):
if self.tokenizer.model_max_length == 1000000000000000019884624838656:
if self.tokenizer.model_max_length == TOKENIZER_INFINITY:
return self._DEFAULT_MAX_LENGTH
return self.tokenizer.model_max_length
return self._DEFAULT_MAX_LENGTH
......@@ -489,12 +488,12 @@ class HFLM(TemplateLM):
def _get_backend(
self,
config: Union[transformers.PretrainedConfig, transformers.AutoConfig],
config: transformers.PretrainedConfig | transformers.AutoConfig,
backend: Literal["default", "causal", "seq2seq"] = "default",
trust_remote_code: Optional[bool] = False,
trust_remote_code: bool | None = False,
) -> None:
"""
Helper method during initialization.
"""Helper method during initialization.
Determines the backend ("causal" (decoder-only) or "seq2seq" (encoder-decoder)) model type to be used.
sets `self.AUTO_MODEL_CLASS` appropriately if not already set.
......@@ -506,9 +505,7 @@ class HFLM(TemplateLM):
if backend != "default":
# if we've settled on non-default backend, use that manually
if backend == "causal":
self.backend = backend
elif backend == "seq2seq":
if backend in ["causal", "seq2seq"]:
self.backend = backend
eval_logger.info(
f"Overrode HF model backend type, and using type '{self.backend}'"
......@@ -516,7 +513,7 @@ class HFLM(TemplateLM):
else:
# determine and use the default HF backend for this model, based on its config + metadata.
if (
getattr(config, "model_type")
getattr(config, "model_type", None)
in MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES
):
# first check if model type is listed under seq2seq models, since some
......@@ -525,7 +522,7 @@ class HFLM(TemplateLM):
self.backend = "seq2seq"
eval_logger.debug(f"Using model type '{self.backend}'")
elif (
getattr(self.config, "model_type") in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
getattr(config, "model_type", None) in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
):
self.backend = "causal"
eval_logger.debug(f"Using model type '{self.backend}'")
......@@ -554,10 +551,10 @@ class HFLM(TemplateLM):
pretrained: str,
revision: str = "main",
trust_remote_code: bool = False,
gguf_file: Optional[str] = None,
gguf_file: str | None = None,
subfolder: str = "",
) -> None:
"""Return the model config for HuggingFace models"""
"""Return the model config for HuggingFace models."""
self._config = transformers.AutoConfig.from_pretrained(
pretrained,
revision=revision,
......@@ -569,29 +566,28 @@ class HFLM(TemplateLM):
def _create_model(
self,
pretrained: str,
revision: Optional[str] = "main",
dtype: Optional[Union[str, torch.dtype]] = "auto",
trust_remote_code: Optional[bool] = False,
revision: str | None = "main",
dtype: str | torch.dtype | None = "auto",
trust_remote_code: bool | None = False,
# arguments used for splitting a model across GPUs naively.
# only used if `parallelize=True`.
# (accelerate naive PP (device_map) options)
parallelize: Optional[bool] = False,
gpus: Optional[int] = None,
max_memory_per_gpu: Optional[Union[int, str]] = None,
max_cpu_memory: Optional[Union[int, str]] = None,
offload_folder: Optional[str] = "./offload",
parallelize: bool | None = False,
gpus: int | None = None,
max_memory_per_gpu: int | str | None = None,
max_cpu_memory: int | str | None = None,
offload_folder: str | None = "./offload",
# PEFT, delta weights and quantization options
peft: Optional[str] = None,
delta: Optional[str] = None,
autogptq: Optional[Union[bool, str]] = False,
gptqmodel: Optional[bool] = False,
gguf_file: Optional[str] = None,
quantization_config: Optional["AutoQuantizationConfig"] = None,
peft: str | None = None,
delta: str | None = None,
autogptq: bool | str | None = False,
gptqmodel: bool | None = False,
gguf_file: str | None = None,
quantization_config: AutoQuantizationConfig | None = None,
subfolder: str = "",
**kwargs,
) -> None:
"""
Initializes an HF or HF-compatible PreTrainedModel from scratch
"""Initializes an HF or HF-compatible PreTrainedModel from scratch
inside HFLM, using the kwargs passed into self.__init__().
Also handles functionality such as AutoGPTQ usage and PEFT wrapping.
......@@ -602,12 +598,12 @@ class HFLM(TemplateLM):
please consider subclassing HFLM and overriding this and other methods as needed.
"""
model_kwargs = kwargs if kwargs else {}
model_kwargs = kwargs or {}
model_kwargs.update(
self._get_accelerate_args(
parallelize=parallelize,
device_map=kwargs.get("device_map", None),
device_map=kwargs.get("device_map"),
max_memory_per_gpu=max_memory_per_gpu,
max_cpu_memory=max_cpu_memory,
offload_folder=offload_folder,
......@@ -616,16 +612,12 @@ class HFLM(TemplateLM):
)
if not autogptq and not gptqmodel:
if model_kwargs.get("load_in_4bit", None):
assert transformers.__version__ >= "4.30.0", (
if model_kwargs.get("load_in_4bit"):
assert vparse(transformers.__version__) >= vparse("4.30.0"), (
"load_in_4bit requires transformers >= 4.30.0"
)
if transformers.__version__ >= "4.30.0":
if model_kwargs.get("load_in_4bit", None):
if model_kwargs.get("bnb_4bit_compute_dtype", None):
model_kwargs["bnb_4bit_compute_dtype"] = get_dtype(
model_kwargs["bnb_4bit_compute_dtype"]
)
if compute_dtype := model_kwargs.get("bnb_4bit_compute_dtype"):
model_kwargs["bnb_4bit_compute_dtype"] = get_dtype(compute_dtype)
self._model = self.AUTO_MODEL_CLASS.from_pretrained(
pretrained,
......@@ -650,7 +642,7 @@ class HFLM(TemplateLM):
raise type(exception)(
"Tried to load auto_gptq, but auto-gptq is not installed ",
"please install auto-gptq via pip install lm-eval[gptq] or pip install -e .[gptq]",
)
) from exception
self._model = AutoGPTQForCausalLM.from_quantized(
pretrained,
......@@ -669,7 +661,7 @@ class HFLM(TemplateLM):
raise type(exception)(
"Tried to load gptqmodel, but gptqmodel is not installed ",
"please install gptqmodel via `pip install gptqmodel --no-build-isolation` or `pip install lm-eval[gptqmodel] --no-build-isolation`",
)
) from exception
self._model = GPTQModel.from_quantized(
pretrained, trust_remote_code=trust_remote_code, **model_kwargs
......@@ -684,9 +676,10 @@ class HFLM(TemplateLM):
from peft import PeftModel
from peft import __version__ as PEFT_VERSION
if model_kwargs.get("load_in_4bit", None):
if version.parse(PEFT_VERSION) < version.parse("0.4.0"):
raise AssertionError("load_in_4bit requires peft >= 0.4.0")
if model_kwargs.get("load_in_4bit") and vparse(PEFT_VERSION) < vparse(
"0.4.0"
):
raise AssertionError("load_in_4bit requires peft >= 0.4.0")
if self._model.config.vocab_size != len(self.tokenizer):
# resize model for LoRAs with added tokens
eval_logger.info(
......@@ -711,36 +704,32 @@ class HFLM(TemplateLM):
for name, param in self._model.state_dict().items():
try:
param.data += _model_delta.state_dict()[name]
except KeyError:
raise KeyError(f"Delta model is missing weights for layer: {name}")
except KeyError as e:
raise KeyError(
f"Delta model is missing weights for layer: {name}"
) from e
except Exception as e:
raise RuntimeError(
f"Failed to add delta weights to layer {name}. Error: {e}"
)
) from e
del _model_delta
return None
def _create_tokenizer(
self,
pretrained: Union[str, transformers.PreTrainedModel],
tokenizer: Optional[
Union[
str,
transformers.PreTrainedTokenizer,
transformers.PreTrainedTokenizerFast,
]
],
revision: Optional[str] = "main",
trust_remote_code: Optional[bool] = False,
use_fast_tokenizer: Optional[bool] = True,
gguf_file: Optional[str] = None,
add_bos_token: Optional[bool] = False,
subfolder: Optional[str] = "",
pretrained: str | transformers.PreTrainedModel,
tokenizer: str
| transformers.PreTrainedTokenizer
| transformers.PreTrainedTokenizerFast
| None,
revision: str | None = "main",
trust_remote_code: bool | None = False,
use_fast_tokenizer: bool | None = True,
gguf_file: str | None = None,
add_bos_token: bool | None = False,
subfolder: str | None = "",
) -> None:
"""
Helper method during initialization.
"""Helper method during initialization.
Create a tokenizer object corresponding to the correct
tokenizer for value of `pretrained`, or use the pre-initialized tokenizer passed.
......@@ -769,8 +758,12 @@ class HFLM(TemplateLM):
)
else:
assert isinstance(
tokenizer, transformers.PreTrainedTokenizer
) or isinstance(tokenizer, transformers.PreTrainedTokenizerFast)
tokenizer,
(
transformers.PreTrainedTokenizer,
transformers.PreTrainedTokenizerFast,
),
)
self.tokenizer = tokenizer
else:
# Get tokenizer based on 'pretrained'
......@@ -782,9 +775,8 @@ class HFLM(TemplateLM):
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
model_name, **kwargs
)
return None
def _detect_batch_size(self, requests=None, pos: int = 0):
def _detect_batch_size(self, requests: Sequence | None = None, pos: int = 0):
if requests:
_, context_enc, continuation_enc = requests[pos]
max_length = len(
......@@ -799,7 +791,7 @@ class HFLM(TemplateLM):
# if OOM, then halves batch_size and tries again
@find_executable_batch_size(starting_batch_size=self.max_batch_size)
def forward_batch(batch_size):
def forward_batch(batch_size: int):
if self.backend == "seq2seq":
length = max(max_context_enc, max_cont_enc)
batched_conts = torch.ones(
......@@ -846,8 +838,11 @@ class HFLM(TemplateLM):
return batch_size
def tok_encode(
self, string: str, left_truncate_len=None, add_special_tokens=None
) -> List[int]:
self,
string: str,
left_truncate_len: int | None = None,
add_special_tokens: bool | None = None,
) -> list[int]:
""" """
# default for None - empty dict, use predefined tokenizer param
# used for all models except for CausalLM or predefined value
......@@ -873,11 +868,11 @@ class HFLM(TemplateLM):
def tok_batch_encode(
self,
strings: List[str],
strings: list[str],
padding_side: str = "left",
left_truncate_len: int = None,
left_truncate_len: int | None = None,
truncation: bool = False,
) -> Tuple[torch.Tensor, torch.Tensor]:
) -> tuple[torch.Tensor, torch.Tensor]:
# encode a batch of strings. converts to tensors and pads automatically, unlike tok_encode.
old_padding_side = self.tokenizer.padding_side
self.tokenizer.padding_side = padding_side
......@@ -896,7 +891,7 @@ class HFLM(TemplateLM):
if left_truncate_len:
original_lengths = encoding["input_ids"].size(1)
if original_lengths > left_truncate_len:
eval_logger.warn(
eval_logger.warning(
f"Left truncation applied. Original sequence length was {original_lengths}, "
f"truncating to last {left_truncate_len} tokens. Some content will be lost.",
)
......@@ -908,11 +903,17 @@ class HFLM(TemplateLM):
return encoding["input_ids"], encoding["attention_mask"]
def tok_decode(self, tokens, skip_special_tokens=True):
def tok_decode(self, tokens: Iterator[list[str]], skip_special_tokens: bool = True):
return self.tokenizer.decode(tokens, skip_special_tokens=skip_special_tokens)
def _model_call(self, inps, attn_mask=None, labels=None):
def _model_call(
self,
inps: torch.Tensor,
attn_mask: torch.Tensor | None = None,
labels: torch.Tensor | None = None,
) -> torch.Tensor:
"""
:param inps: torch.Tensor
A torch tensor of shape [batch, (sequence_ctx + sequence_cont)] or of shape
[batch, sequence_ctx]. the size of sequence may vary from call to call
......@@ -926,32 +927,40 @@ class HFLM(TemplateLM):
A torch tensor of shape [batch, sequence, vocab] with the
logits returned from the model's decoder
"""
with torch.no_grad():
with torch.autocast(
with (
torch.no_grad(),
torch.autocast(
device_type=self.device.type,
dtype=self.mixed_precision_dtype,
enabled=self.mixed_precision_dtype is not None,
):
if attn_mask is not None or labels is not None:
assert attn_mask is not None and labels is not None
assert self.AUTO_MODEL_CLASS == transformers.AutoModelForSeq2SeqLM
return self.model(
input_ids=inps, attention_mask=attn_mask, labels=labels
).logits
else:
assert self.AUTO_MODEL_CLASS in (
transformers.AutoModelForCausalLM,
transformers.AutoModelForVision2Seq,
)
return self.model(inps).logits
),
):
if attn_mask is not None or labels is not None:
assert attn_mask is not None and labels is not None
assert transformers.AutoModelForSeq2SeqLM == self.AUTO_MODEL_CLASS
return self.model(
input_ids=inps, attention_mask=attn_mask, labels=labels
).logits
assert self.AUTO_MODEL_CLASS in (
transformers.AutoModelForCausalLM,
transformers.AutoModelForVision2Seq,
)
return self.model(inps).logits
def _model_generate(self, context, max_length, stop, **generation_kwargs):
def _model_generate(
self,
context,
max_length: int,
stop: list[str],
**generation_kwargs: dict[str, Any],
) -> torch.Tensor:
# temperature = 0.0 if not set
# if do_sample is false and temp==0.0:
# remove temperature, as do_sample=False takes care of this
# and we don't want a warning from HF
generation_kwargs["temperature"] = generation_kwargs.get("temperature", 0.0)
do_sample = generation_kwargs.get("do_sample", None)
do_sample = generation_kwargs.get("do_sample")
# The temperature has to be a strictly positive float -- if it is 0.0, use greedy decoding strategies
if generation_kwargs.get("temperature") == 0.0 and do_sample is None:
......@@ -978,7 +987,10 @@ class HFLM(TemplateLM):
)
def _select_cont_toks(
self, logits: torch.Tensor, contlen: int = None, inplen: int = None
self,
logits: torch.Tensor,
contlen: int | None = None,
inplen: int | None = None,
) -> torch.Tensor:
if self.backend == "causal":
assert contlen and inplen, (
......@@ -998,8 +1010,8 @@ class HFLM(TemplateLM):
return logits
def loglikelihood_rolling(
self, requests: List[Instance], disable_tqdm: bool = False
) -> List[float]:
self, requests: list[Instance], disable_tqdm: bool = False
) -> list[float]:
adaptive_batch_size = None
if self.batch_size == "auto":
# using rolling window with maximum context
......@@ -1018,7 +1030,7 @@ class HFLM(TemplateLM):
disable=(disable_tqdm or (self.rank != 0)),
)
):
rolling_token_windows: List[Tuple[List[int], List[int]]] = list(
rolling_token_windows: list[tuple[list[int], list[int]]] = list(
map(
utils.make_disjoint_window,
utils.get_rolling_token_windows(
......@@ -1102,15 +1114,15 @@ class HFLM(TemplateLM):
def _loglikelihood_tokens(
self,
requests: List[Tuple[Tuple[str, str], List[int], List[int]]],
requests: list[tuple[tuple[str, str], list[int], list[int]]],
disable_tqdm: bool = False,
override_bs: int = None,
) -> List[Tuple[float, bool]]:
override_bs: int | None = None,
) -> list[tuple[float, bool]]:
# TODO: implement some kind of efficient-request-middleware that lumps together requests with the same context
res = []
def _collate(req: Tuple[Tuple[str, str], List[int], List[int]]):
"""Defines the key for the sorted method"""
def _collate(req: tuple[tuple[str, str], list[int], list[int]]):
"""Defines the key for the sorted method."""
# the negative sign on len(toks) sorts descending - this has a few advantages:
# - time estimates will always be over not underestimates, which is more useful for planning
# - to know the size of a batch when going through the list, you know the first one is always the batch
......@@ -1121,8 +1133,8 @@ class HFLM(TemplateLM):
toks = req[1] + req[2]
return -len(toks), tuple(toks)
def _lookup_one_token_cont(req: Tuple[Tuple[str, str], List[int], List[int]]):
"""Defines the key to group and lookup one-token continuations"""
def _lookup_one_token_cont(req: tuple[tuple[str, str], list[int], list[int]]):
"""Defines the key to group and lookup one-token continuations."""
# Use with group_by="contexts" (optional)"
# allows for the creation of a lookup, so we can reuse logits in case of one-token continuations.
# speeds up some multiple-choice tasks proportionally to the number of choices.
......@@ -1295,7 +1307,7 @@ class HFLM(TemplateLM):
# original args. Otherwise, expands the logits batch dimension and yields each
# batch along with matching continuation tokens and prompt strings.
# logits -> [1, seq, vocab]
for request_str, cont_toks, logits in re_ord.get_cache(
for request_str, cont_toks, logits in re_ord.get_cache( # noqa
req_str=request_str,
cxt_toks=ctx_tokens,
cont_toks=cont_toks,
......@@ -1336,11 +1348,11 @@ class HFLM(TemplateLM):
return re_ord.get_original(res)
def generate_until(
self, requests: List[Instance], disable_tqdm: bool = False
) -> List[str]:
self, requests: list[Instance], disable_tqdm: bool = False
) -> list[str]:
res = []
def _collate(req: Tuple[str, dict]):
def _collate(req: tuple[str, dict]):
"""Defines the key for the sorted method"""
# the negative sign on len(toks) sorts descending - this has a few advantages:
# - time estimates will always be over not underestimates, which is more useful for planning
......@@ -1400,10 +1412,10 @@ class HFLM(TemplateLM):
# add EOS token to stop sequences
until = handle_stop_sequences(kwargs.pop("until", None), eos=eos)
else:
raise ValueError(
raise TypeError(
f"Expected `kwargs` to be of type `dict` but got {type(gen_kwargs)}"
)
if "max_gen_toks" in kwargs.keys():
if "max_gen_toks" in kwargs:
max_gen_toks = kwargs.pop("max_gen_toks")
else:
max_gen_toks = self.max_gen_toks
......@@ -1481,11 +1493,9 @@ class HFLM(TemplateLM):
return res
def apply_chat_template(
self, chat_history: List[Dict[str, str]], add_generation_prompt: bool = True
self, chat_history: list[dict[str, str]], add_generation_prompt: bool = True
) -> str:
"""
Method to apply a chat template to a list of chat history between user and model.
"""
"""Method to apply a chat template to a list of chat history between user and model."""
try:
chat_templated = self.tokenizer.apply_chat_template(
chat_history,
......@@ -1510,9 +1520,7 @@ class HFLM(TemplateLM):
return chat_templated
def get_model_info(self) -> dict:
"""
Method to get Hugging Face model information for experiment reproducibility.
"""
"""Method to get Hugging Face model information for experiment reproducibility."""
def get_model_num_params(model) -> int:
if hasattr(model, "num_parameters"):
......
tag:
- multiple_choice
task: hellaswag
dataset_path: hellaswag
dataset_path: Rowan/hellaswag
dataset_name: null
output_type: multiple_choice
training_split: train
......
......@@ -21,7 +21,7 @@ license = { "text" = "MIT" }
dependencies = [
"accelerate>=0.26.0",
"evaluate",
"datasets>=2.16.0",
"datasets>=2.16.0,<4.0",
"evaluate>=0.4.0",
"jsonlines",
"numexpr",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment