@@ -17,7 +18,6 @@ from lm_eval.models.utils import (
undistribute,
)
fromlm_eval.utilsimport(
eval_logger,
get_rolling_token_windows,
make_disjoint_window,
)
...
...
@@ -34,7 +34,7 @@ except ModuleNotFoundError:
ifTYPE_CHECKING:
pass
eval_logger=eval_logger
eval_logger=logging.getLogger(__name__)
@register_model("vllm")
...
...
@@ -75,7 +75,6 @@ class VLLM(TemplateLM):
"Please install vllm via `pip install lm-eval[vllm]` or `pip install -e .[vllm]`"
)
assert"cuda"indeviceordeviceisNone,"vLLM only supports CUDA"
assertmax_lengthisNoneormax_model_lenisNone,(
"Either max_length or max_model_len may be provided, but not both"
)
...
...
@@ -110,7 +109,7 @@ class VLLM(TemplateLM):
eval_logger.warning(
"You might experience occasional issues with model weight downloading when data_parallel is in use. To ensure stable performance, run with data_parallel_size=1 until the weights are downloaded and cached."
BasqueBench is a benchmark for evaluating language models in Basque tasks. This is, it evaluates the ability of a language model to understand and generate Basque text. BasqueBench offers a combination of pre-existing, open datasets and datasets developed exclusivelly for this benchmark. All the details of BasqueBench will be published in a paper soon.
The new evaluation datasets included in BasqueBench are:
| Task | Category | Homepage |
|:-------------:|:-----:|:-----:|
| MGSM_eu | Math | https://huggingface.co/datasets/HiTZ/MGSM-eu |