Merge remote-tracking branch 'origin/master' into fazz/refactor-task-coqa

5552c8dc · thefazzer · c0862026 · 826d90e2 · 5552c8dc · 5552c8dc
Commit 5552c8dc authored Feb 02, 2021 by thefazzer
Hide whitespace changes
Inline Side-by-side

Showing with 80 additions and 4 deletions

lm_eval/utils_stream.py lm_eval/utils_stream.py +7 -4

setup.py setup.py +22 -0

tests/test_all_sanitycheck.py tests/test_all_sanitycheck.py +51 -0

No files found.
--- a/lm_eval/utils_stream.py
+++ b/lm_eval/utils_stream.py
@@ -5,11 +5,13 @@ from tqdm import tqdm
 import json


-class ExitCodeError(Exception): pass
+class ExitCodeError(Exception):
+    pass


 def sh(x):
-    if os.system(x): raise ExitCodeError()
+    if os.system(x):
+        raise ExitCodeError()

 def ls(x):
    return [x + '/' + fn for fn in os.listdir(x)]
@@ -64,7 +66,8 @@ class join:
        self.sep = sep

    def __rrshift__(self, other):
-        if other is None: return
+        if other is None:
+            return
        try:
            return self.sep.join(other)
        except:
@@ -156,4 +159,4 @@ def comp(*fs):
    return _f


-X = Reflective()
\ No newline at end of file
+X = Reflective()
--- a/setup.py
+++ b/setup.py
+import setuptools
+
+with open("README.md", "r", encoding="utf-8") as fh:
+    long_description = fh.read()
+
+setuptools.setup(
+    name="lm_eval_harness",
+    version="0.0.1",
+    author="Leo Gao",
+    author_email="lg@eleuther.ai",
+    description="A framework for evaluating autoregressive language models",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/EleutherAI/lm-evaluation-harness",
+    packages=setuptools.find_packages(),
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+    python_requires='>=3.6',
+)
--- a/tests/test_all_sanitycheck.py
+++ b/tests/test_all_sanitycheck.py
+import lm_eval.tasks as tasks
+import lm_eval.base as base
+from unittest.mock import MagicMock
+from itertools import islice
+import pytest
+
+
+@pytest.mark.parametrize("taskname,Task", tasks.TASK_REGISTRY.items())
+def test_basic_interface(taskname, Task):
+    print('Evaluating task', taskname)
+    dl = Task.download
+    Task.download = MagicMock()
+    task = Task()
+    Task.download = dl
+
+    assert task.has_training_docs() in [True, False]
+    assert task.has_validation_docs() in [True, False]
+    assert task.has_test_docs() in [True, False]
+
+    assert isinstance(task.aggregation(), dict)
+    assert isinstance(task.higher_is_better(), dict)
+    assert task.aggregation().keys() == task.higher_is_better().keys()
+
+    for v in task.higher_is_better().values(): assert v in [True, False]
+
+
+@pytest.mark.parametrize("taskname,Task", tasks.TASK_REGISTRY.items())
+def test_documents_and_requests(taskname, Task):
+    print('Evaluating task', taskname)
+    task = Task()
+    fns = []
+    if task.has_training_docs(): fns.append(task.training_docs)
+    if task.has_validation_docs(): fns.append(task.validation_docs)
+    # test doce might not have labels
+    #if task.has_test_docs(): fns.append(task.test_docs)
+
+    for fn in fns:
+        #print(list(islice(fn(), 10)))
+        for doc in islice(fn(), 10):
+            
+            txt = task.doc_to_text(doc)
+            tgt = task.doc_to_target(doc)
+
+            assert isinstance(txt, str)
+            assert isinstance(tgt, str)
+
+            reqs = task.construct_requests(doc, txt)
+
+            # todo: mock lm by pluggin what's currently in main.py in here
+            for req in reqs:
+                assert isinstance(req, base.Request)
\ No newline at end of file