Commit 96c60cf6 authored by baberabb's avatar baberabb
Browse files

artifact loglikelihood results

parent f1e62d36
......@@ -70,3 +70,9 @@ jobs:
# if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Test with pytest
run: python -m pytest --showlocals -s -vv -n=auto --ignore=tests/tests_master --ignore=tests/extra
- name: Archive artifacts
uses: actions/upload-artifact@v3
with:
name: output_results
path: |
test_logs/*
from __future__ import annotations
import pytest
from pathlib import Path
import numpy as np
from lm_eval.models.huggingface import HFLM
from lm_eval.api.instance import Instance
import lm_eval.tasks as tasks
import sys
import torch
class Test_HFLM:
torch.use_deterministic_algorithms(True)
version_minor = sys.version_info.minor
multiple_choice_task = tasks.TASK_REGISTRY.get("arc_easy")() # type: ignore
multiple_choice_task.build_all_requests(limit=10, rank=0, world_size=1)
MULTIPLE_CH: list[Instance] = multiple_choice_task.instances
......@@ -91,7 +94,12 @@ class Test_HFLM:
def test_logliklihood(self) -> None:
res = self.LM.loglikelihood(self.MULTIPLE_CH)
_RES, _res = self.MULTIPLE_CH_RES, [r[0] for r in res]
# change atol in case of consistent failure
# log samples to CI
file_path = Path(f"test_logs/outputs_log_{self.version_minor}.txt")
file_path.mkdir(parents=True, exist_ok=True)
file_path = file_path.resolve()
with open(file_path, "w") as f:
f.write("\n".join(str(x) for x in _res))
assert np.allclose(_res, _RES, atol=1e-4)
# check indices for Multiple Choice
argmax_RES, argmax_res = np.argmax(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment