Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
904bba12
Unverified
Commit
904bba12
authored
Jul 23, 2025
by
Baber Abbasi
Committed by
GitHub
Jul 23, 2025
Browse files
Pin datasets < 4.0.0 (#3172)
* Fix: pin datasets < 4.0 * fix * update type hints in HF * fix hellaswag path
parent
2eea3f50
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
199 additions
and
185 deletions
+199
-185
lm_eval/__main__.py
lm_eval/__main__.py
+3
-1
lm_eval/api/task.py
lm_eval/api/task.py
+4
-0
lm_eval/models/huggingface.py
lm_eval/models/huggingface.py
+190
-182
lm_eval/tasks/hellaswag/hellaswag.yaml
lm_eval/tasks/hellaswag/hellaswag.yaml
+1
-1
pyproject.toml
pyproject.toml
+1
-1
No files found.
lm_eval/__main__.py
View file @
904bba12
...
@@ -433,7 +433,9 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
...
@@ -433,7 +433,9 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
# because it's already been determined based on the prior env var before launching our
# because it's already been determined based on the prior env var before launching our
# script--`datasets` gets imported by lm_eval internally before these lines can update the env.
# script--`datasets` gets imported by lm_eval internally before these lines can update the env.
import
datasets
import
datasets
from
packaging.version
import
parse
as
vparse
if
vparse
(
datasets
.
__version__
)
<
vparse
(
"4.0.0"
):
datasets
.
config
.
HF_DATASETS_TRUST_REMOTE_CODE
=
True
datasets
.
config
.
HF_DATASETS_TRUST_REMOTE_CODE
=
True
if
isinstance
(
args
.
model_args
,
dict
):
if
isinstance
(
args
.
model_args
,
dict
):
...
...
lm_eval/api/task.py
View file @
904bba12
...
@@ -981,6 +981,10 @@ class ConfigurableTask(Task):
...
@@ -981,6 +981,10 @@ class ConfigurableTask(Task):
def
download
(
def
download
(
self
,
dataset_kwargs
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
**
kwargs
self
,
dataset_kwargs
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
**
kwargs
)
->
None
:
)
->
None
:
from
packaging.version
import
parse
as
vparse
if
dataset_kwargs
and
vparse
(
datasets
.
__version__
)
>=
vparse
(
"4.0.0"
):
dataset_kwargs
.
pop
(
"trust_remote_code"
,
None
)
if
isinstance
(
self
.
config
.
custom_dataset
,
Callable
):
if
isinstance
(
self
.
config
.
custom_dataset
,
Callable
):
eval_logger
.
warning
(
eval_logger
.
warning
(
f
"
{
self
.
config
.
task
}
: Custom kwargs can be passed to `--metadata` in console (as json string) or to the TaskManager."
f
"
{
self
.
config
.
task
}
: Custom kwargs can be passed to `--metadata` in console (as json string) or to the TaskManager."
...
...
lm_eval/models/huggingface.py
View file @
904bba12
...
@@ -3,9 +3,10 @@ from __future__ import annotations
...
@@ -3,9 +3,10 @@ from __future__ import annotations
import
copy
import
copy
import
logging
import
logging
import
os
import
os
from
collections.abc
import
Iterator
,
Sequence
from
datetime
import
timedelta
from
datetime
import
timedelta
from
pathlib
import
Path
from
pathlib
import
Path
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Literal
,
Optional
,
Tuple
,
Union
from
typing
import
TYPE_CHECKING
,
Any
,
Literal
import
jinja2
import
jinja2
import
torch
import
torch
...
@@ -19,6 +20,7 @@ from accelerate import (
...
@@ -19,6 +20,7 @@ from accelerate import (
from
accelerate.utils
import
get_max_memory
from
accelerate.utils
import
get_max_memory
from
huggingface_hub
import
HfApi
from
huggingface_hub
import
HfApi
from
packaging
import
version
from
packaging
import
version
from
packaging.version
import
parse
as
vparse
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
transformers.models.auto.modeling_auto
import
(
from
transformers.models.auto.modeling_auto
import
(
MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
,
MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
,
...
@@ -26,7 +28,6 @@ from transformers.models.auto.modeling_auto import (
...
@@ -26,7 +28,6 @@ from transformers.models.auto.modeling_auto import (
)
)
from
lm_eval
import
utils
from
lm_eval
import
utils
from
lm_eval.api.instance
import
Instance
from
lm_eval.api.model
import
TemplateLM
from
lm_eval.api.model
import
TemplateLM
from
lm_eval.api.registry
import
register_model
from
lm_eval.api.registry
import
register_model
from
lm_eval.models.utils
import
(
from
lm_eval.models.utils
import
(
...
@@ -42,15 +43,17 @@ from lm_eval.models.utils import (
...
@@ -42,15 +43,17 @@ from lm_eval.models.utils import (
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
transformers.quantizers
import
AutoQuantizationConfig
from
transformers.quantizers.auto
import
AutoQuantizationConfig
from
lm_eval.api.instance
import
Instance
eval_logger
=
logging
.
getLogger
(
__name__
)
eval_logger
=
logging
.
getLogger
(
__name__
)
TOKENIZER_INFINITY
=
1000000000000000019884624838656
@
register_model
(
"hf-auto"
,
"hf"
,
"huggingface"
)
@
register_model
(
"hf-auto"
,
"hf"
,
"huggingface"
)
class
HFLM
(
TemplateLM
):
class
HFLM
(
TemplateLM
):
"""
"""An abstracted Huggingface model class. Enables usage with both models of
An abstracted Huggingface model class. Enables usage with both models of
`transformers.AutoModelForCausalLM` and `transformers.AutoModelForSeq2SeqLM` classes.
`transformers.AutoModelForCausalLM` and `transformers.AutoModelForSeq2SeqLM` classes.
Supports data-parallel multi-GPU with HF Accelerate.
Supports data-parallel multi-GPU with HF Accelerate.
...
@@ -61,48 +64,45 @@ class HFLM(TemplateLM):
...
@@ -61,48 +64,45 @@ class HFLM(TemplateLM):
def
__init__
(
def
__init__
(
self
,
self
,
pretrained
:
Union
[
str
,
transformers
.
PreTrainedModel
]
,
pretrained
:
str
|
transformers
.
PreTrainedModel
,
backend
:
Literal
[
"default"
,
"causal"
,
"seq2seq"
]
=
"default"
,
backend
:
Literal
[
"default"
,
"causal"
,
"seq2seq"
]
=
"default"
,
# override whether the model should be treated as decoder-only (causal) or encoder-decoder (seq2seq)
# override whether the model should be treated as decoder-only (causal) or encoder-decoder (seq2seq)
revision
:
Optional
[
str
]
=
"main"
,
revision
:
str
|
None
=
"main"
,
subfolder
:
str
=
""
,
subfolder
:
str
=
""
,
tokenizer
:
Optional
[
tokenizer
:
str
Union
[
|
transformers
.
PreTrainedTokenizer
str
,
|
transformers
.
PreTrainedTokenizerFast
transformers
.
PreTrainedTokenizer
,
|
None
=
None
,
transformers
.
PreTrainedTokenizerFast
,
truncation
:
bool
|
None
=
False
,
]
]
=
None
,
truncation
:
Optional
[
bool
]
=
False
,
logits_cache
:
bool
=
True
,
logits_cache
:
bool
=
True
,
max_length
:
Optional
[
int
]
=
None
,
max_length
:
int
|
None
=
None
,
device
:
Optional
[
str
]
=
"cuda"
,
device
:
str
|
None
=
"cuda"
,
dtype
:
Optional
[
Union
[
str
,
torch
.
dtype
]]
=
"auto"
,
dtype
:
str
|
torch
.
dtype
|
None
=
"auto"
,
softmax_dtype
:
Optional
[
Union
[
str
,
torch
.
dtype
]]
=
None
,
softmax_dtype
:
str
|
torch
.
dtype
|
None
=
None
,
mixed_precision_dtype
:
Optional
[
Union
[
str
,
torch
.
dtype
]]
=
None
,
mixed_precision_dtype
:
str
|
torch
.
dtype
|
None
=
None
,
batch_size
:
Optional
[
Union
[
int
,
str
]]
=
1
,
batch_size
:
int
|
str
|
None
=
1
,
max_batch_size
:
Optional
[
int
]
=
64
,
max_batch_size
:
int
|
None
=
64
,
trust_remote_code
:
Optional
[
bool
]
=
False
,
trust_remote_code
:
bool
|
None
=
False
,
use_fast_tokenizer
:
Optional
[
bool
]
=
True
,
use_fast_tokenizer
:
bool
|
None
=
True
,
add_bos_token
:
Optional
[
bool
]
=
False
,
add_bos_token
:
bool
|
None
=
False
,
prefix_token_id
:
Optional
[
int
]
=
None
,
prefix_token_id
:
int
|
None
=
None
,
# arguments used for splitting a model across GPUs naively.
# arguments used for splitting a model across GPUs naively.
# only used if `parallelize=True`.
# only used if `parallelize=True`.
parallelize
:
Optional
[
bool
]
=
False
,
parallelize
:
bool
|
None
=
False
,
max_memory_per_gpu
:
Optional
[
Union
[
int
,
str
]]
=
None
,
max_memory_per_gpu
:
int
|
str
|
None
=
None
,
max_cpu_memory
:
Optional
[
Union
[
int
,
str
]]
=
None
,
max_cpu_memory
:
int
|
str
|
None
=
None
,
offload_folder
:
Optional
[
Union
[
str
,
os
.
PathLike
]]
=
"./offload"
,
offload_folder
:
str
|
os
.
PathLike
|
None
=
"./offload"
,
# PEFT, delta weights and quantization options
# PEFT, delta weights and quantization options
peft
:
Optional
[
str
]
=
None
,
peft
:
str
|
None
=
None
,
delta
:
Optional
[
str
]
=
None
,
delta
:
str
|
None
=
None
,
autogptq
:
Optional
[
Union
[
bool
,
str
]]
=
False
,
autogptq
:
bool
|
str
|
None
=
False
,
gptqmodel
:
Optional
[
bool
]
=
False
,
gptqmodel
:
bool
|
None
=
False
,
gguf_file
:
Optional
[
str
]
=
None
,
gguf_file
:
str
|
None
=
None
,
# end token for thinking, either the string or int token id.
# end token for thinking, either the string or int token id.
# splits to get response after this token (if provided).
# splits to get response after this token (if provided).
think_end_token
:
Union
[
str
,
int
,
None
]
=
None
,
think_end_token
:
str
|
int
|
None
=
None
,
enable_thinking
:
bool
|
None
=
None
,
enable_thinking
:
bool
|
None
=
None
,
chat_template_args
:
Optional
[
dict
[
str
,
Any
]
]
=
None
,
chat_template_args
:
dict
[
str
,
Any
]
|
None
=
None
,
**
kwargs
,
**
kwargs
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
...
@@ -280,9 +280,10 @@ class HFLM(TemplateLM):
...
@@ -280,9 +280,10 @@ class HFLM(TemplateLM):
self
.
batch_size_per_gpu
=
int
(
batch_size
)
self
.
batch_size_per_gpu
=
int
(
batch_size
)
if
isinstance
(
pretrained
,
str
):
if
isinstance
(
pretrained
,
str
):
if
gpus
>=
1
or
str
(
self
.
device
)
==
"mps"
:
if
(
gpus
>=
1
or
str
(
self
.
device
)
==
"mps"
)
and
not
(
parallelize
or
autogptq
or
hasattr
(
self
,
"accelerator"
)
):
# TODO: can remove this whole snippet except in the mps case, perhaps?
# TODO: can remove this whole snippet except in the mps case, perhaps?
if
not
(
parallelize
or
autogptq
or
hasattr
(
self
,
"accelerator"
)):
# place model onto device requested manually,
# place model onto device requested manually,
# if not using HF Accelerate or device_map
# if not using HF Accelerate or device_map
# or any other option that preloads model onto device
# or any other option that preloads model onto device
...
@@ -336,12 +337,12 @@ class HFLM(TemplateLM):
...
@@ -336,12 +337,12 @@ class HFLM(TemplateLM):
def
_get_accelerate_args
(
def
_get_accelerate_args
(
self
,
self
,
parallelize
:
Optional
[
bool
]
=
None
,
parallelize
:
bool
|
None
=
None
,
device_map
:
Optional
[
str
]
=
"auto"
,
device_map
:
str
|
None
=
"auto"
,
max_memory_per_gpu
:
Optional
[
Union
[
int
,
str
]]
=
None
,
max_memory_per_gpu
:
int
|
str
|
None
=
None
,
max_cpu_memory
:
Optional
[
Union
[
int
,
str
]]
=
None
,
max_cpu_memory
:
int
|
str
|
None
=
None
,
offload_folder
:
Optional
[
str
]
=
"./offload"
,
offload_folder
:
str
|
None
=
"./offload"
,
gpus
:
Optional
[
int
]
=
None
,
gpus
:
int
|
None
=
None
,
)
->
dict
:
)
->
dict
:
"""Returns the kwargs needed to apply `accelerate` in `AutoModel.from_pretrained`."""
"""Returns the kwargs needed to apply `accelerate` in `AutoModel.from_pretrained`."""
num_local_processes
=
int
(
os
.
environ
.
get
(
"LOCAL_WORLD_SIZE"
,
1
))
num_local_processes
=
int
(
os
.
environ
.
get
(
"LOCAL_WORLD_SIZE"
,
1
))
...
@@ -379,13 +380,8 @@ class HFLM(TemplateLM):
...
@@ -379,13 +380,8 @@ class HFLM(TemplateLM):
}
}
else
:
# Estimating the possible memory requirements
else
:
# Estimating the possible memory requirements
max_memory_all_gpus
=
get_max_memory
()
max_memory_all_gpus
=
get_max_memory
()
if
"cpu"
in
max_memory_all_gpus
:
max_memory_all_gpus
.
pop
(
"cpu"
,
None
)
del
max_memory_all_gpus
[
"cpu"
]
if
hasattr
(
self
,
"accelerator"
):
if
not
hasattr
(
self
,
"accelerator"
):
max_memory_per_gpu_map
=
{
k
:
v
for
k
,
v
in
max_memory_all_gpus
.
items
()
}
else
:
# use only 1 / num_processes of the GPUs if we are running under accelerate launch
# use only 1 / num_processes of the GPUs if we are running under accelerate launch
max_memory_per_gpu_map
=
{
max_memory_per_gpu_map
=
{
k
:
v
k
:
v
...
@@ -393,6 +389,9 @@ class HFLM(TemplateLM):
...
@@ -393,6 +389,9 @@ class HFLM(TemplateLM):
if
k
%
num_local_processes
if
k
%
num_local_processes
==
(
self
.
accelerator
.
process_index
%
num_local_processes
)
==
(
self
.
accelerator
.
process_index
%
num_local_processes
)
}
}
else
:
max_memory_per_gpu_map
=
max_memory_all_gpus
args
[
"max_memory"
]
=
max_memory_per_gpu_map
args
[
"max_memory"
]
=
max_memory_per_gpu_map
args
[
"device_map"
]
=
"auto"
if
device_map
is
None
else
device_map
args
[
"device_map"
]
=
"auto"
if
device_map
is
None
else
device_map
eval_logger
.
info
(
eval_logger
.
info
(
...
@@ -436,12 +435,12 @@ class HFLM(TemplateLM):
...
@@ -436,12 +435,12 @@ class HFLM(TemplateLM):
return
self
.
_model
return
self
.
_model
@
property
@
property
def
eot_token_id
(
self
):
def
eot_token_id
(
self
)
->
int
:
# we use EOT because end of *text* is more accurate for what we're doing than end of *sentence*
# we use EOT because end of *text* is more accurate for what we're doing than end of *sentence*
return
self
.
tokenizer
.
eos_token_id
return
self
.
tokenizer
.
eos_token_id
@
property
@
property
def
prefix_token_id
(
self
):
def
prefix_token_id
(
self
)
->
int
:
# it is used as prefix for loglikelihood
# it is used as prefix for loglikelihood
if
self
.
custom_prefix_token_id
is
not
None
:
if
self
.
custom_prefix_token_id
is
not
None
:
return
self
.
custom_prefix_token_id
return
self
.
custom_prefix_token_id
...
@@ -450,7 +449,7 @@ class HFLM(TemplateLM):
...
@@ -450,7 +449,7 @@ class HFLM(TemplateLM):
return
self
.
tokenizer
.
eos_token_id
return
self
.
tokenizer
.
eos_token_id
@
property
@
property
def
max_length
(
self
):
def
max_length
(
self
)
->
int
:
if
self
.
_max_length
:
# if max length manually set, return it
if
self
.
_max_length
:
# if max length manually set, return it
return
self
.
_max_length
return
self
.
_max_length
seqlen_config_attrs
=
(
"n_positions"
,
"max_position_embeddings"
,
"n_ctx"
)
seqlen_config_attrs
=
(
"n_positions"
,
"max_position_embeddings"
,
"n_ctx"
)
...
@@ -458,7 +457,7 @@ class HFLM(TemplateLM):
...
@@ -458,7 +457,7 @@ class HFLM(TemplateLM):
if
hasattr
(
self
.
model
.
config
,
attr
):
if
hasattr
(
self
.
model
.
config
,
attr
):
return
getattr
(
self
.
model
.
config
,
attr
)
return
getattr
(
self
.
model
.
config
,
attr
)
if
hasattr
(
self
.
tokenizer
,
"model_max_length"
):
if
hasattr
(
self
.
tokenizer
,
"model_max_length"
):
if
self
.
tokenizer
.
model_max_length
==
1000000000000000019884624838656
:
if
self
.
tokenizer
.
model_max_length
==
TOKENIZER_INFINITY
:
return
self
.
_DEFAULT_MAX_LENGTH
return
self
.
_DEFAULT_MAX_LENGTH
return
self
.
tokenizer
.
model_max_length
return
self
.
tokenizer
.
model_max_length
return
self
.
_DEFAULT_MAX_LENGTH
return
self
.
_DEFAULT_MAX_LENGTH
...
@@ -489,12 +488,12 @@ class HFLM(TemplateLM):
...
@@ -489,12 +488,12 @@ class HFLM(TemplateLM):
def
_get_backend
(
def
_get_backend
(
self
,
self
,
config
:
Union
[
transformers
.
PretrainedConfig
,
transformers
.
AutoConfig
]
,
config
:
transformers
.
PretrainedConfig
|
transformers
.
AutoConfig
,
backend
:
Literal
[
"default"
,
"causal"
,
"seq2seq"
]
=
"default"
,
backend
:
Literal
[
"default"
,
"causal"
,
"seq2seq"
]
=
"default"
,
trust_remote_code
:
Optional
[
bool
]
=
False
,
trust_remote_code
:
bool
|
None
=
False
,
)
->
None
:
)
->
None
:
"""
"""
Helper method during initialization.
Helper method during initialization.
Determines the backend ("causal" (decoder-only) or "seq2seq" (encoder-decoder)) model type to be used.
Determines the backend ("causal" (decoder-only) or "seq2seq" (encoder-decoder)) model type to be used.
sets `self.AUTO_MODEL_CLASS` appropriately if not already set.
sets `self.AUTO_MODEL_CLASS` appropriately if not already set.
...
@@ -506,9 +505,7 @@ class HFLM(TemplateLM):
...
@@ -506,9 +505,7 @@ class HFLM(TemplateLM):
if
backend
!=
"default"
:
if
backend
!=
"default"
:
# if we've settled on non-default backend, use that manually
# if we've settled on non-default backend, use that manually
if
backend
==
"causal"
:
if
backend
in
[
"causal"
,
"seq2seq"
]:
self
.
backend
=
backend
elif
backend
==
"seq2seq"
:
self
.
backend
=
backend
self
.
backend
=
backend
eval_logger
.
info
(
eval_logger
.
info
(
f
"Overrode HF model backend type, and using type '
{
self
.
backend
}
'"
f
"Overrode HF model backend type, and using type '
{
self
.
backend
}
'"
...
@@ -516,7 +513,7 @@ class HFLM(TemplateLM):
...
@@ -516,7 +513,7 @@ class HFLM(TemplateLM):
else
:
else
:
# determine and use the default HF backend for this model, based on its config + metadata.
# determine and use the default HF backend for this model, based on its config + metadata.
if
(
if
(
getattr
(
config
,
"model_type"
)
getattr
(
config
,
"model_type"
,
None
)
in
MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES
in
MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES
):
):
# first check if model type is listed under seq2seq models, since some
# first check if model type is listed under seq2seq models, since some
...
@@ -525,7 +522,7 @@ class HFLM(TemplateLM):
...
@@ -525,7 +522,7 @@ class HFLM(TemplateLM):
self
.
backend
=
"seq2seq"
self
.
backend
=
"seq2seq"
eval_logger
.
debug
(
f
"Using model type '
{
self
.
backend
}
'"
)
eval_logger
.
debug
(
f
"Using model type '
{
self
.
backend
}
'"
)
elif
(
elif
(
getattr
(
self
.
config
,
"model_type"
)
in
MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
getattr
(
config
,
"model_type"
,
None
)
in
MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
):
):
self
.
backend
=
"causal"
self
.
backend
=
"causal"
eval_logger
.
debug
(
f
"Using model type '
{
self
.
backend
}
'"
)
eval_logger
.
debug
(
f
"Using model type '
{
self
.
backend
}
'"
)
...
@@ -554,10 +551,10 @@ class HFLM(TemplateLM):
...
@@ -554,10 +551,10 @@ class HFLM(TemplateLM):
pretrained
:
str
,
pretrained
:
str
,
revision
:
str
=
"main"
,
revision
:
str
=
"main"
,
trust_remote_code
:
bool
=
False
,
trust_remote_code
:
bool
=
False
,
gguf_file
:
Optional
[
str
]
=
None
,
gguf_file
:
str
|
None
=
None
,
subfolder
:
str
=
""
,
subfolder
:
str
=
""
,
)
->
None
:
)
->
None
:
"""Return the model config for HuggingFace models"""
"""Return the model config for HuggingFace models
.
"""
self
.
_config
=
transformers
.
AutoConfig
.
from_pretrained
(
self
.
_config
=
transformers
.
AutoConfig
.
from_pretrained
(
pretrained
,
pretrained
,
revision
=
revision
,
revision
=
revision
,
...
@@ -569,29 +566,28 @@ class HFLM(TemplateLM):
...
@@ -569,29 +566,28 @@ class HFLM(TemplateLM):
def
_create_model
(
def
_create_model
(
self
,
self
,
pretrained
:
str
,
pretrained
:
str
,
revision
:
Optional
[
str
]
=
"main"
,
revision
:
str
|
None
=
"main"
,
dtype
:
Optional
[
Union
[
str
,
torch
.
dtype
]]
=
"auto"
,
dtype
:
str
|
torch
.
dtype
|
None
=
"auto"
,
trust_remote_code
:
Optional
[
bool
]
=
False
,
trust_remote_code
:
bool
|
None
=
False
,
# arguments used for splitting a model across GPUs naively.
# arguments used for splitting a model across GPUs naively.
# only used if `parallelize=True`.
# only used if `parallelize=True`.
# (accelerate naive PP (device_map) options)
# (accelerate naive PP (device_map) options)
parallelize
:
Optional
[
bool
]
=
False
,
parallelize
:
bool
|
None
=
False
,
gpus
:
Optional
[
int
]
=
None
,
gpus
:
int
|
None
=
None
,
max_memory_per_gpu
:
Optional
[
Union
[
int
,
str
]]
=
None
,
max_memory_per_gpu
:
int
|
str
|
None
=
None
,
max_cpu_memory
:
Optional
[
Union
[
int
,
str
]]
=
None
,
max_cpu_memory
:
int
|
str
|
None
=
None
,
offload_folder
:
Optional
[
str
]
=
"./offload"
,
offload_folder
:
str
|
None
=
"./offload"
,
# PEFT, delta weights and quantization options
# PEFT, delta weights and quantization options
peft
:
Optional
[
str
]
=
None
,
peft
:
str
|
None
=
None
,
delta
:
Optional
[
str
]
=
None
,
delta
:
str
|
None
=
None
,
autogptq
:
Optional
[
Union
[
bool
,
str
]]
=
False
,
autogptq
:
bool
|
str
|
None
=
False
,
gptqmodel
:
Optional
[
bool
]
=
False
,
gptqmodel
:
bool
|
None
=
False
,
gguf_file
:
Optional
[
str
]
=
None
,
gguf_file
:
str
|
None
=
None
,
quantization_config
:
Optional
[
"
AutoQuantizationConfig
"
]
=
None
,
quantization_config
:
AutoQuantizationConfig
|
None
=
None
,
subfolder
:
str
=
""
,
subfolder
:
str
=
""
,
**
kwargs
,
**
kwargs
,
)
->
None
:
)
->
None
:
"""
"""Initializes an HF or HF-compatible PreTrainedModel from scratch
Initializes an HF or HF-compatible PreTrainedModel from scratch
inside HFLM, using the kwargs passed into self.__init__().
inside HFLM, using the kwargs passed into self.__init__().
Also handles functionality such as AutoGPTQ usage and PEFT wrapping.
Also handles functionality such as AutoGPTQ usage and PEFT wrapping.
...
@@ -602,12 +598,12 @@ class HFLM(TemplateLM):
...
@@ -602,12 +598,12 @@ class HFLM(TemplateLM):
please consider subclassing HFLM and overriding this and other methods as needed.
please consider subclassing HFLM and overriding this and other methods as needed.
"""
"""
model_kwargs
=
kwargs
if
kwargs
else
{}
model_kwargs
=
kwargs
or
{}
model_kwargs
.
update
(
model_kwargs
.
update
(
self
.
_get_accelerate_args
(
self
.
_get_accelerate_args
(
parallelize
=
parallelize
,
parallelize
=
parallelize
,
device_map
=
kwargs
.
get
(
"device_map"
,
None
),
device_map
=
kwargs
.
get
(
"device_map"
),
max_memory_per_gpu
=
max_memory_per_gpu
,
max_memory_per_gpu
=
max_memory_per_gpu
,
max_cpu_memory
=
max_cpu_memory
,
max_cpu_memory
=
max_cpu_memory
,
offload_folder
=
offload_folder
,
offload_folder
=
offload_folder
,
...
@@ -616,16 +612,12 @@ class HFLM(TemplateLM):
...
@@ -616,16 +612,12 @@ class HFLM(TemplateLM):
)
)
if
not
autogptq
and
not
gptqmodel
:
if
not
autogptq
and
not
gptqmodel
:
if
model_kwargs
.
get
(
"load_in_4bit"
,
None
):
if
model_kwargs
.
get
(
"load_in_4bit"
):
assert
transformers
.
__version__
>=
"4.30.0"
,
(
assert
vparse
(
transformers
.
__version__
)
>=
vparse
(
"4.30.0"
)
,
(
"load_in_4bit requires transformers >= 4.30.0"
"load_in_4bit requires transformers >= 4.30.0"
)
)
if
transformers
.
__version__
>=
"4.30.0"
:
if
compute_dtype
:
=
model_kwargs
.
get
(
"bnb_4bit_compute_dtype"
):
if
model_kwargs
.
get
(
"load_in_4bit"
,
None
):
model_kwargs
[
"bnb_4bit_compute_dtype"
]
=
get_dtype
(
compute_dtype
)
if
model_kwargs
.
get
(
"bnb_4bit_compute_dtype"
,
None
):
model_kwargs
[
"bnb_4bit_compute_dtype"
]
=
get_dtype
(
model_kwargs
[
"bnb_4bit_compute_dtype"
]
)
self
.
_model
=
self
.
AUTO_MODEL_CLASS
.
from_pretrained
(
self
.
_model
=
self
.
AUTO_MODEL_CLASS
.
from_pretrained
(
pretrained
,
pretrained
,
...
@@ -650,7 +642,7 @@ class HFLM(TemplateLM):
...
@@ -650,7 +642,7 @@ class HFLM(TemplateLM):
raise
type
(
exception
)(
raise
type
(
exception
)(
"Tried to load auto_gptq, but auto-gptq is not installed "
,
"Tried to load auto_gptq, but auto-gptq is not installed "
,
"please install auto-gptq via pip install lm-eval[gptq] or pip install -e .[gptq]"
,
"please install auto-gptq via pip install lm-eval[gptq] or pip install -e .[gptq]"
,
)
)
from
exception
self
.
_model
=
AutoGPTQForCausalLM
.
from_quantized
(
self
.
_model
=
AutoGPTQForCausalLM
.
from_quantized
(
pretrained
,
pretrained
,
...
@@ -669,7 +661,7 @@ class HFLM(TemplateLM):
...
@@ -669,7 +661,7 @@ class HFLM(TemplateLM):
raise
type
(
exception
)(
raise
type
(
exception
)(
"Tried to load gptqmodel, but gptqmodel is not installed "
,
"Tried to load gptqmodel, but gptqmodel is not installed "
,
"please install gptqmodel via `pip install gptqmodel --no-build-isolation` or `pip install lm-eval[gptqmodel] --no-build-isolation`"
,
"please install gptqmodel via `pip install gptqmodel --no-build-isolation` or `pip install lm-eval[gptqmodel] --no-build-isolation`"
,
)
)
from
exception
self
.
_model
=
GPTQModel
.
from_quantized
(
self
.
_model
=
GPTQModel
.
from_quantized
(
pretrained
,
trust_remote_code
=
trust_remote_code
,
**
model_kwargs
pretrained
,
trust_remote_code
=
trust_remote_code
,
**
model_kwargs
...
@@ -684,8 +676,9 @@ class HFLM(TemplateLM):
...
@@ -684,8 +676,9 @@ class HFLM(TemplateLM):
from
peft
import
PeftModel
from
peft
import
PeftModel
from
peft
import
__version__
as
PEFT_VERSION
from
peft
import
__version__
as
PEFT_VERSION
if
model_kwargs
.
get
(
"load_in_4bit"
,
None
):
if
model_kwargs
.
get
(
"load_in_4bit"
)
and
vparse
(
PEFT_VERSION
)
<
vparse
(
if
version
.
parse
(
PEFT_VERSION
)
<
version
.
parse
(
"0.4.0"
):
"0.4.0"
):
raise
AssertionError
(
"load_in_4bit requires peft >= 0.4.0"
)
raise
AssertionError
(
"load_in_4bit requires peft >= 0.4.0"
)
if
self
.
_model
.
config
.
vocab_size
!=
len
(
self
.
tokenizer
):
if
self
.
_model
.
config
.
vocab_size
!=
len
(
self
.
tokenizer
):
# resize model for LoRAs with added tokens
# resize model for LoRAs with added tokens
...
@@ -711,36 +704,32 @@ class HFLM(TemplateLM):
...
@@ -711,36 +704,32 @@ class HFLM(TemplateLM):
for
name
,
param
in
self
.
_model
.
state_dict
().
items
():
for
name
,
param
in
self
.
_model
.
state_dict
().
items
():
try
:
try
:
param
.
data
+=
_model_delta
.
state_dict
()[
name
]
param
.
data
+=
_model_delta
.
state_dict
()[
name
]
except
KeyError
:
except
KeyError
as
e
:
raise
KeyError
(
f
"Delta model is missing weights for layer:
{
name
}
"
)
raise
KeyError
(
f
"Delta model is missing weights for layer:
{
name
}
"
)
from
e
except
Exception
as
e
:
except
Exception
as
e
:
raise
RuntimeError
(
raise
RuntimeError
(
f
"Failed to add delta weights to layer
{
name
}
. Error:
{
e
}
"
f
"Failed to add delta weights to layer
{
name
}
. Error:
{
e
}
"
)
)
from
e
del
_model_delta
del
_model_delta
return
None
def
_create_tokenizer
(
def
_create_tokenizer
(
self
,
self
,
pretrained
:
Union
[
str
,
transformers
.
PreTrainedModel
],
pretrained
:
str
|
transformers
.
PreTrainedModel
,
tokenizer
:
Optional
[
tokenizer
:
str
Union
[
|
transformers
.
PreTrainedTokenizer
str
,
|
transformers
.
PreTrainedTokenizerFast
transformers
.
PreTrainedTokenizer
,
|
None
,
transformers
.
PreTrainedTokenizerFast
,
revision
:
str
|
None
=
"main"
,
]
trust_remote_code
:
bool
|
None
=
False
,
],
use_fast_tokenizer
:
bool
|
None
=
True
,
revision
:
Optional
[
str
]
=
"main"
,
gguf_file
:
str
|
None
=
None
,
trust_remote_code
:
Optional
[
bool
]
=
False
,
add_bos_token
:
bool
|
None
=
False
,
use_fast_tokenizer
:
Optional
[
bool
]
=
True
,
subfolder
:
str
|
None
=
""
,
gguf_file
:
Optional
[
str
]
=
None
,
add_bos_token
:
Optional
[
bool
]
=
False
,
subfolder
:
Optional
[
str
]
=
""
,
)
->
None
:
)
->
None
:
"""
"""Helper method during initialization.
Helper method during initialization.
Create a tokenizer object corresponding to the correct
Create a tokenizer object corresponding to the correct
tokenizer for value of `pretrained`, or use the pre-initialized tokenizer passed.
tokenizer for value of `pretrained`, or use the pre-initialized tokenizer passed.
...
@@ -769,8 +758,12 @@ class HFLM(TemplateLM):
...
@@ -769,8 +758,12 @@ class HFLM(TemplateLM):
)
)
else
:
else
:
assert
isinstance
(
assert
isinstance
(
tokenizer
,
transformers
.
PreTrainedTokenizer
tokenizer
,
)
or
isinstance
(
tokenizer
,
transformers
.
PreTrainedTokenizerFast
)
(
transformers
.
PreTrainedTokenizer
,
transformers
.
PreTrainedTokenizerFast
,
),
)
self
.
tokenizer
=
tokenizer
self
.
tokenizer
=
tokenizer
else
:
else
:
# Get tokenizer based on 'pretrained'
# Get tokenizer based on 'pretrained'
...
@@ -782,9 +775,8 @@ class HFLM(TemplateLM):
...
@@ -782,9 +775,8 @@ class HFLM(TemplateLM):
self
.
tokenizer
=
transformers
.
AutoTokenizer
.
from_pretrained
(
self
.
tokenizer
=
transformers
.
AutoTokenizer
.
from_pretrained
(
model_name
,
**
kwargs
model_name
,
**
kwargs
)
)
return
None
def
_detect_batch_size
(
self
,
requests
=
None
,
pos
:
int
=
0
):
def
_detect_batch_size
(
self
,
requests
:
Sequence
|
None
=
None
,
pos
:
int
=
0
):
if
requests
:
if
requests
:
_
,
context_enc
,
continuation_enc
=
requests
[
pos
]
_
,
context_enc
,
continuation_enc
=
requests
[
pos
]
max_length
=
len
(
max_length
=
len
(
...
@@ -799,7 +791,7 @@ class HFLM(TemplateLM):
...
@@ -799,7 +791,7 @@ class HFLM(TemplateLM):
# if OOM, then halves batch_size and tries again
# if OOM, then halves batch_size and tries again
@
find_executable_batch_size
(
starting_batch_size
=
self
.
max_batch_size
)
@
find_executable_batch_size
(
starting_batch_size
=
self
.
max_batch_size
)
def
forward_batch
(
batch_size
):
def
forward_batch
(
batch_size
:
int
):
if
self
.
backend
==
"seq2seq"
:
if
self
.
backend
==
"seq2seq"
:
length
=
max
(
max_context_enc
,
max_cont_enc
)
length
=
max
(
max_context_enc
,
max_cont_enc
)
batched_conts
=
torch
.
ones
(
batched_conts
=
torch
.
ones
(
...
@@ -846,8 +838,11 @@ class HFLM(TemplateLM):
...
@@ -846,8 +838,11 @@ class HFLM(TemplateLM):
return
batch_size
return
batch_size
def
tok_encode
(
def
tok_encode
(
self
,
string
:
str
,
left_truncate_len
=
None
,
add_special_tokens
=
None
self
,
)
->
List
[
int
]:
string
:
str
,
left_truncate_len
:
int
|
None
=
None
,
add_special_tokens
:
bool
|
None
=
None
,
)
->
list
[
int
]:
""" """
""" """
# default for None - empty dict, use predefined tokenizer param
# default for None - empty dict, use predefined tokenizer param
# used for all models except for CausalLM or predefined value
# used for all models except for CausalLM or predefined value
...
@@ -873,11 +868,11 @@ class HFLM(TemplateLM):
...
@@ -873,11 +868,11 @@ class HFLM(TemplateLM):
def
tok_batch_encode
(
def
tok_batch_encode
(
self
,
self
,
strings
:
L
ist
[
str
],
strings
:
l
ist
[
str
],
padding_side
:
str
=
"left"
,
padding_side
:
str
=
"left"
,
left_truncate_len
:
int
=
None
,
left_truncate_len
:
int
|
None
=
None
,
truncation
:
bool
=
False
,
truncation
:
bool
=
False
,
)
->
T
uple
[
torch
.
Tensor
,
torch
.
Tensor
]:
)
->
t
uple
[
torch
.
Tensor
,
torch
.
Tensor
]:
# encode a batch of strings. converts to tensors and pads automatically, unlike tok_encode.
# encode a batch of strings. converts to tensors and pads automatically, unlike tok_encode.
old_padding_side
=
self
.
tokenizer
.
padding_side
old_padding_side
=
self
.
tokenizer
.
padding_side
self
.
tokenizer
.
padding_side
=
padding_side
self
.
tokenizer
.
padding_side
=
padding_side
...
@@ -896,7 +891,7 @@ class HFLM(TemplateLM):
...
@@ -896,7 +891,7 @@ class HFLM(TemplateLM):
if
left_truncate_len
:
if
left_truncate_len
:
original_lengths
=
encoding
[
"input_ids"
].
size
(
1
)
original_lengths
=
encoding
[
"input_ids"
].
size
(
1
)
if
original_lengths
>
left_truncate_len
:
if
original_lengths
>
left_truncate_len
:
eval_logger
.
warn
(
eval_logger
.
warn
ing
(
f
"Left truncation applied. Original sequence length was
{
original_lengths
}
, "
f
"Left truncation applied. Original sequence length was
{
original_lengths
}
, "
f
"truncating to last
{
left_truncate_len
}
tokens. Some content will be lost."
,
f
"truncating to last
{
left_truncate_len
}
tokens. Some content will be lost."
,
)
)
...
@@ -908,11 +903,17 @@ class HFLM(TemplateLM):
...
@@ -908,11 +903,17 @@ class HFLM(TemplateLM):
return
encoding
[
"input_ids"
],
encoding
[
"attention_mask"
]
return
encoding
[
"input_ids"
],
encoding
[
"attention_mask"
]
def
tok_decode
(
self
,
tokens
,
skip_special_tokens
=
True
):
def
tok_decode
(
self
,
tokens
:
Iterator
[
list
[
str
]]
,
skip_special_tokens
:
bool
=
True
):
return
self
.
tokenizer
.
decode
(
tokens
,
skip_special_tokens
=
skip_special_tokens
)
return
self
.
tokenizer
.
decode
(
tokens
,
skip_special_tokens
=
skip_special_tokens
)
def
_model_call
(
self
,
inps
,
attn_mask
=
None
,
labels
=
None
):
def
_model_call
(
self
,
inps
:
torch
.
Tensor
,
attn_mask
:
torch
.
Tensor
|
None
=
None
,
labels
:
torch
.
Tensor
|
None
=
None
,
)
->
torch
.
Tensor
:
"""
"""
:param inps: torch.Tensor
:param inps: torch.Tensor
A torch tensor of shape [batch, (sequence_ctx + sequence_cont)] or of shape
A torch tensor of shape [batch, (sequence_ctx + sequence_cont)] or of shape
[batch, sequence_ctx]. the size of sequence may vary from call to call
[batch, sequence_ctx]. the size of sequence may vary from call to call
...
@@ -926,32 +927,40 @@ class HFLM(TemplateLM):
...
@@ -926,32 +927,40 @@ class HFLM(TemplateLM):
A torch tensor of shape [batch, sequence, vocab] with the
A torch tensor of shape [batch, sequence, vocab] with the
logits returned from the model's decoder
logits returned from the model's decoder
"""
"""
with
torch
.
no_grad
():
with
(
with
torch
.
autocast
(
torch
.
no_grad
(),
torch
.
autocast
(
device_type
=
self
.
device
.
type
,
device_type
=
self
.
device
.
type
,
dtype
=
self
.
mixed_precision_dtype
,
dtype
=
self
.
mixed_precision_dtype
,
enabled
=
self
.
mixed_precision_dtype
is
not
None
,
enabled
=
self
.
mixed_precision_dtype
is
not
None
,
),
):
):
if
attn_mask
is
not
None
or
labels
is
not
None
:
if
attn_mask
is
not
None
or
labels
is
not
None
:
assert
attn_mask
is
not
None
and
labels
is
not
None
assert
attn_mask
is
not
None
and
labels
is
not
None
assert
self
.
AUTO_MODEL_CLASS
==
transformers
.
AutoModelForSeq2SeqLM
assert
transformers
.
AutoModelForSeq2SeqLM
==
self
.
AUTO_MODEL_CLASS
return
self
.
model
(
return
self
.
model
(
input_ids
=
inps
,
attention_mask
=
attn_mask
,
labels
=
labels
input_ids
=
inps
,
attention_mask
=
attn_mask
,
labels
=
labels
).
logits
).
logits
else
:
assert
self
.
AUTO_MODEL_CLASS
in
(
assert
self
.
AUTO_MODEL_CLASS
in
(
transformers
.
AutoModelForCausalLM
,
transformers
.
AutoModelForCausalLM
,
transformers
.
AutoModelForVision2Seq
,
transformers
.
AutoModelForVision2Seq
,
)
)
return
self
.
model
(
inps
).
logits
return
self
.
model
(
inps
).
logits
def
_model_generate
(
self
,
context
,
max_length
,
stop
,
**
generation_kwargs
):
def
_model_generate
(
self
,
context
,
max_length
:
int
,
stop
:
list
[
str
],
**
generation_kwargs
:
dict
[
str
,
Any
],
)
->
torch
.
Tensor
:
# temperature = 0.0 if not set
# temperature = 0.0 if not set
# if do_sample is false and temp==0.0:
# if do_sample is false and temp==0.0:
# remove temperature, as do_sample=False takes care of this
# remove temperature, as do_sample=False takes care of this
# and we don't want a warning from HF
# and we don't want a warning from HF
generation_kwargs
[
"temperature"
]
=
generation_kwargs
.
get
(
"temperature"
,
0.0
)
generation_kwargs
[
"temperature"
]
=
generation_kwargs
.
get
(
"temperature"
,
0.0
)
do_sample
=
generation_kwargs
.
get
(
"do_sample"
,
None
)
do_sample
=
generation_kwargs
.
get
(
"do_sample"
)
# The temperature has to be a strictly positive float -- if it is 0.0, use greedy decoding strategies
# The temperature has to be a strictly positive float -- if it is 0.0, use greedy decoding strategies
if
generation_kwargs
.
get
(
"temperature"
)
==
0.0
and
do_sample
is
None
:
if
generation_kwargs
.
get
(
"temperature"
)
==
0.0
and
do_sample
is
None
:
...
@@ -978,7 +987,10 @@ class HFLM(TemplateLM):
...
@@ -978,7 +987,10 @@ class HFLM(TemplateLM):
)
)
def
_select_cont_toks
(
def
_select_cont_toks
(
self
,
logits
:
torch
.
Tensor
,
contlen
:
int
=
None
,
inplen
:
int
=
None
self
,
logits
:
torch
.
Tensor
,
contlen
:
int
|
None
=
None
,
inplen
:
int
|
None
=
None
,
)
->
torch
.
Tensor
:
)
->
torch
.
Tensor
:
if
self
.
backend
==
"causal"
:
if
self
.
backend
==
"causal"
:
assert
contlen
and
inplen
,
(
assert
contlen
and
inplen
,
(
...
@@ -998,8 +1010,8 @@ class HFLM(TemplateLM):
...
@@ -998,8 +1010,8 @@ class HFLM(TemplateLM):
return
logits
return
logits
def
loglikelihood_rolling
(
def
loglikelihood_rolling
(
self
,
requests
:
L
ist
[
Instance
],
disable_tqdm
:
bool
=
False
self
,
requests
:
l
ist
[
Instance
],
disable_tqdm
:
bool
=
False
)
->
L
ist
[
float
]:
)
->
l
ist
[
float
]:
adaptive_batch_size
=
None
adaptive_batch_size
=
None
if
self
.
batch_size
==
"auto"
:
if
self
.
batch_size
==
"auto"
:
# using rolling window with maximum context
# using rolling window with maximum context
...
@@ -1018,7 +1030,7 @@ class HFLM(TemplateLM):
...
@@ -1018,7 +1030,7 @@ class HFLM(TemplateLM):
disable
=
(
disable_tqdm
or
(
self
.
rank
!=
0
)),
disable
=
(
disable_tqdm
or
(
self
.
rank
!=
0
)),
)
)
):
):
rolling_token_windows
:
L
ist
[
T
uple
[
L
ist
[
int
],
L
ist
[
int
]]]
=
list
(
rolling_token_windows
:
l
ist
[
t
uple
[
l
ist
[
int
],
l
ist
[
int
]]]
=
list
(
map
(
map
(
utils
.
make_disjoint_window
,
utils
.
make_disjoint_window
,
utils
.
get_rolling_token_windows
(
utils
.
get_rolling_token_windows
(
...
@@ -1102,15 +1114,15 @@ class HFLM(TemplateLM):
...
@@ -1102,15 +1114,15 @@ class HFLM(TemplateLM):
def
_loglikelihood_tokens
(
def
_loglikelihood_tokens
(
self
,
self
,
requests
:
L
ist
[
T
uple
[
T
uple
[
str
,
str
],
L
ist
[
int
],
L
ist
[
int
]]],
requests
:
l
ist
[
t
uple
[
t
uple
[
str
,
str
],
l
ist
[
int
],
l
ist
[
int
]]],
disable_tqdm
:
bool
=
False
,
disable_tqdm
:
bool
=
False
,
override_bs
:
int
=
None
,
override_bs
:
int
|
None
=
None
,
)
->
L
ist
[
T
uple
[
float
,
bool
]]:
)
->
l
ist
[
t
uple
[
float
,
bool
]]:
# TODO: implement some kind of efficient-request-middleware that lumps together requests with the same context
# TODO: implement some kind of efficient-request-middleware that lumps together requests with the same context
res
=
[]
res
=
[]
def
_collate
(
req
:
T
uple
[
T
uple
[
str
,
str
],
L
ist
[
int
],
L
ist
[
int
]]):
def
_collate
(
req
:
t
uple
[
t
uple
[
str
,
str
],
l
ist
[
int
],
l
ist
[
int
]]):
"""Defines the key for the sorted method"""
"""Defines the key for the sorted method
.
"""
# the negative sign on len(toks) sorts descending - this has a few advantages:
# the negative sign on len(toks) sorts descending - this has a few advantages:
# - time estimates will always be over not underestimates, which is more useful for planning
# - time estimates will always be over not underestimates, which is more useful for planning
# - to know the size of a batch when going through the list, you know the first one is always the batch
# - to know the size of a batch when going through the list, you know the first one is always the batch
...
@@ -1121,8 +1133,8 @@ class HFLM(TemplateLM):
...
@@ -1121,8 +1133,8 @@ class HFLM(TemplateLM):
toks
=
req
[
1
]
+
req
[
2
]
toks
=
req
[
1
]
+
req
[
2
]
return
-
len
(
toks
),
tuple
(
toks
)
return
-
len
(
toks
),
tuple
(
toks
)
def
_lookup_one_token_cont
(
req
:
T
uple
[
T
uple
[
str
,
str
],
L
ist
[
int
],
L
ist
[
int
]]):
def
_lookup_one_token_cont
(
req
:
t
uple
[
t
uple
[
str
,
str
],
l
ist
[
int
],
l
ist
[
int
]]):
"""Defines the key to group and lookup one-token continuations"""
"""Defines the key to group and lookup one-token continuations
.
"""
# Use with group_by="contexts" (optional)"
# Use with group_by="contexts" (optional)"
# allows for the creation of a lookup, so we can reuse logits in case of one-token continuations.
# allows for the creation of a lookup, so we can reuse logits in case of one-token continuations.
# speeds up some multiple-choice tasks proportionally to the number of choices.
# speeds up some multiple-choice tasks proportionally to the number of choices.
...
@@ -1295,7 +1307,7 @@ class HFLM(TemplateLM):
...
@@ -1295,7 +1307,7 @@ class HFLM(TemplateLM):
# original args. Otherwise, expands the logits batch dimension and yields each
# original args. Otherwise, expands the logits batch dimension and yields each
# batch along with matching continuation tokens and prompt strings.
# batch along with matching continuation tokens and prompt strings.
# logits -> [1, seq, vocab]
# logits -> [1, seq, vocab]
for
request_str
,
cont_toks
,
logits
in
re_ord
.
get_cache
(
for
request_str
,
cont_toks
,
logits
in
re_ord
.
get_cache
(
# noqa
req_str
=
request_str
,
req_str
=
request_str
,
cxt_toks
=
ctx_tokens
,
cxt_toks
=
ctx_tokens
,
cont_toks
=
cont_toks
,
cont_toks
=
cont_toks
,
...
@@ -1336,11 +1348,11 @@ class HFLM(TemplateLM):
...
@@ -1336,11 +1348,11 @@ class HFLM(TemplateLM):
return
re_ord
.
get_original
(
res
)
return
re_ord
.
get_original
(
res
)
def
generate_until
(
def
generate_until
(
self
,
requests
:
L
ist
[
Instance
],
disable_tqdm
:
bool
=
False
self
,
requests
:
l
ist
[
Instance
],
disable_tqdm
:
bool
=
False
)
->
L
ist
[
str
]:
)
->
l
ist
[
str
]:
res
=
[]
res
=
[]
def
_collate
(
req
:
T
uple
[
str
,
dict
]):
def
_collate
(
req
:
t
uple
[
str
,
dict
]):
"""Defines the key for the sorted method"""
"""Defines the key for the sorted method"""
# the negative sign on len(toks) sorts descending - this has a few advantages:
# the negative sign on len(toks) sorts descending - this has a few advantages:
# - time estimates will always be over not underestimates, which is more useful for planning
# - time estimates will always be over not underestimates, which is more useful for planning
...
@@ -1400,10 +1412,10 @@ class HFLM(TemplateLM):
...
@@ -1400,10 +1412,10 @@ class HFLM(TemplateLM):
# add EOS token to stop sequences
# add EOS token to stop sequences
until
=
handle_stop_sequences
(
kwargs
.
pop
(
"until"
,
None
),
eos
=
eos
)
until
=
handle_stop_sequences
(
kwargs
.
pop
(
"until"
,
None
),
eos
=
eos
)
else
:
else
:
raise
Valu
eError
(
raise
Typ
eError
(
f
"Expected `kwargs` to be of type `dict` but got
{
type
(
gen_kwargs
)
}
"
f
"Expected `kwargs` to be of type `dict` but got
{
type
(
gen_kwargs
)
}
"
)
)
if
"max_gen_toks"
in
kwargs
.
keys
()
:
if
"max_gen_toks"
in
kwargs
:
max_gen_toks
=
kwargs
.
pop
(
"max_gen_toks"
)
max_gen_toks
=
kwargs
.
pop
(
"max_gen_toks"
)
else
:
else
:
max_gen_toks
=
self
.
max_gen_toks
max_gen_toks
=
self
.
max_gen_toks
...
@@ -1481,11 +1493,9 @@ class HFLM(TemplateLM):
...
@@ -1481,11 +1493,9 @@ class HFLM(TemplateLM):
return
res
return
res
def
apply_chat_template
(
def
apply_chat_template
(
self
,
chat_history
:
L
ist
[
D
ict
[
str
,
str
]],
add_generation_prompt
:
bool
=
True
self
,
chat_history
:
l
ist
[
d
ict
[
str
,
str
]],
add_generation_prompt
:
bool
=
True
)
->
str
:
)
->
str
:
"""
"""Method to apply a chat template to a list of chat history between user and model."""
Method to apply a chat template to a list of chat history between user and model.
"""
try
:
try
:
chat_templated
=
self
.
tokenizer
.
apply_chat_template
(
chat_templated
=
self
.
tokenizer
.
apply_chat_template
(
chat_history
,
chat_history
,
...
@@ -1510,9 +1520,7 @@ class HFLM(TemplateLM):
...
@@ -1510,9 +1520,7 @@ class HFLM(TemplateLM):
return
chat_templated
return
chat_templated
def
get_model_info
(
self
)
->
dict
:
def
get_model_info
(
self
)
->
dict
:
"""
"""Method to get Hugging Face model information for experiment reproducibility."""
Method to get Hugging Face model information for experiment reproducibility.
"""
def
get_model_num_params
(
model
)
->
int
:
def
get_model_num_params
(
model
)
->
int
:
if
hasattr
(
model
,
"num_parameters"
):
if
hasattr
(
model
,
"num_parameters"
):
...
...
lm_eval/tasks/hellaswag/hellaswag.yaml
View file @
904bba12
tag
:
tag
:
-
multiple_choice
-
multiple_choice
task
:
hellaswag
task
:
hellaswag
dataset_path
:
hellaswag
dataset_path
:
Rowan/
hellaswag
dataset_name
:
null
dataset_name
:
null
output_type
:
multiple_choice
output_type
:
multiple_choice
training_split
:
train
training_split
:
train
...
...
pyproject.toml
View file @
904bba12
...
@@ -21,7 +21,7 @@ license = { "text" = "MIT" }
...
@@ -21,7 +21,7 @@ license = { "text" = "MIT" }
dependencies
=
[
dependencies
=
[
"accelerate>=0.26.0"
,
"accelerate>=0.26.0"
,
"evaluate"
,
"evaluate"
,
"datasets>=2.16.0"
,
"datasets>=2.16.0
,<4.0
"
,
"evaluate>=0.4.0"
,
"evaluate>=0.4.0"
,
"jsonlines"
,
"jsonlines"
,
"numexpr"
,
"numexpr"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment