Commit f7a6573f authored by Nathan Habib's avatar Nathan Habib
Browse files

checkout from main

parent b5111e31
......@@ -102,12 +102,10 @@ results = lm_eval.simple_evaluate( # call simple_evaluate
)
```
See https://github.com/EleutherAI/lm-evaluation-harness/blob/365fcda9b85bbb6e0572d91976b8daf409164500/lm_eval/evaluator.py#L35 for a full description of all arguments available. All keyword arguments to simple_evaluate share the same role as the command-line flags described previously.
See the `simple_evaluate()` and `evaluate()` functions in [lm_eval/evaluator.py](../lm_eval/evaluator.py#:~:text=simple_evaluate) for a full description of all arguments available. All keyword arguments to simple_evaluate share the same role as the command-line flags described previously.
Additionally, the `evaluate()` function offers the core evaluation functionality provided by the library, but without some of the special handling and simplification + abstraction provided by `simple_evaluate()`.
See https://github.com/EleutherAI/lm-evaluation-harness/blob/365fcda9b85bbb6e0572d91976b8daf409164500/lm_eval/evaluator.py#L173 for more details.
As a brief example usage of `evaluate()`:
```python
......@@ -147,7 +145,7 @@ task_dict = lm_eval.tasks.get_task_dict(
task_manager # A task manager that allows lm_eval to
# load the task during evaluation.
# If none is provided, `get_task_dict`
# will instantiated one itself, but this
# will instantiate one itself, but this
# only includes the stock tasks so users
# will need to set this if including
# custom paths is required.
......
......@@ -23,6 +23,7 @@ DEEPSPARSE_MODELS_TASKS = [
]
@pytest.mark.skip(reason="test failing")
@pytest.mark.parametrize("model_id,task", SPARSEML_MODELS_TASKS)
def test_sparseml_eval(model_id, task):
lm = get_model("sparseml").create_from_arg_string(
......
from typing import List
import pytest
import torch
from lm_eval import tasks
from lm_eval.api.instance import Instance
......@@ -11,7 +10,7 @@ task_manager = tasks.TaskManager()
@pytest.mark.skip(reason="requires CUDA")
class TEST_VLLM:
class Test_VLLM:
vllm = pytest.importorskip("vllm")
try:
from lm_eval.models.vllm_causallms import VLLM
......@@ -19,7 +18,7 @@ class TEST_VLLM:
LM = VLLM(pretrained="EleutherAI/pythia-70m")
except ModuleNotFoundError:
pass
torch.use_deterministic_algorithms(True)
# torch.use_deterministic_algorithms(True)
task_list = task_manager.load_task_or_group(["arc_easy", "gsm8k", "wikitext"])
multiple_choice_task = task_list["arc_easy"] # type: ignore
multiple_choice_task.build_all_requests(limit=10, rank=0, world_size=1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment