Merge pull request #8 from dirkgr/PytestUpdate

Pytest update

Merge pull request #8 from dirkgr/PytestUpdate
Pytest update
1cd4ec01 · Stella Biderman · GitHub · 33f5572a · 38a240ce · 1cd4ec01
Unverified Commit 1cd4ec01 authored Apr 27, 2022 by Stella Biderman Committed by GitHub Apr 27, 2022
5 changed files
--- a/lm_eval/tasks/hendrycks_ethics.py
+++ b/lm_eval/tasks/hendrycks_ethics.py
@@ -277,20 +277,18 @@ class EthicsUtilitarianism(Ethics):
    DATASET_NAME = "utilitarianism"

    def training_docs(self):
-        rnd = random.Random()
        for doc in self.dataset["train"]:
-            yield self._process_doc(doc, rnd)
+            yield self._process_doc(doc)

    def validation_docs(self):
        raise NotImplementedError

    def test_docs(self):
-        rnd = random.Random()
        for doc in self.dataset["test"]:
-            yield self._process_doc(doc, rnd)
+            yield self._process_doc(doc)

-    def _process_doc(self, doc, rnd):
-        rnd.seed(doc["activity"])
+    def _process_doc(self, doc):
+        rnd = random.Random(doc["activity"])
        scenarios = [doc["activity"], doc["baseline"]]
        ordering = [0, 1]
        rnd.shuffle(ordering)

--- a/lm_eval/tasks/hendrycks_math.py
+++ b/lm_eval/tasks/hendrycks_math.py
@@ -38,15 +38,15 @@ class Math(Task):
        return True

    def training_docs(self):
-        return map(self._load_doc, self.dataset["train"])
+        return map(self._process_doc, self.dataset["train"])

    def validation_docs(self):
        return NotImplemented

    def test_docs(self):
-        return map(self._load_doc, self.dataset["test"])
+        return map(self._process_doc, self.dataset["test"])

-    def _load_doc(self, doc):
+    def _process_doc(self, doc):
        doc["answer"] = self.remove_boxed(
            self.last_boxed_only_string(doc["solution"]))
        return doc

--- a/lm_eval/tasks/wikitext.py
+++ b/lm_eval/tasks/wikitext.py
@@ -76,15 +76,15 @@ class WikiText(PerplexityTask):
        return True

    def training_docs(self):
-        return map(self._load_doc, self.dataset["train"])
+        return map(self._process_doc, self.dataset["train"])

    def validation_docs(self):
-        return map(self._load_doc, self.dataset["validation"])
+        return map(self._process_doc, self.dataset["validation"])

    def test_docs(self):
-        return map(self._load_doc, self.dataset["test"])
+        return map(self._process_doc, self.dataset["test"])

-    def _load_doc(self, doc):
+    def _process_doc(self, doc):
        return doc["page"]

    def doc_to_target(self, doc):

--- a/lm_eval/tasks/wsc273.py
+++ b/lm_eval/tasks/wsc273.py
@@ -53,9 +53,9 @@ class WinogradSchemaChallenge273(Task):
        return True

    def test_docs(self):
-        return map(self._load_doc, self.dataset["test"])
+        return map(self._process_doc, self.dataset["test"])

-    def _load_doc(self, doc):
+    def _process_doc(self, doc):
        # The HF implementation of `wsc273` is not `partial evaluation` friendly.
        doc["text"] = doc["text"].replace("  ", " ")
        doc["options"][0] = self.__normalize_option(doc, doc["options"][0])

--- a/setup.py
+++ b/setup.py
@@ -37,7 +37,6 @@ setuptools.setup(
        "pycountry==20.7.3",
        "numexpr==2.7.2",
        "lm_dataformat==0.0.20",
-        "pytest==6.2.3",
        "pybind11==2.6.2",
        "tqdm-multiprocess==0.0.11",
        "zstandard==0.15.2",
@@ -51,4 +50,5 @@ setuptools.setup(
    dependency_links=[
        "https://github.com/google-research/bleurt/archive/b610120347ef22b494b6d69b4316e303f5932516.zip#egg=bleurt",
    ],
+    extras_require={'dev': [ 'pytest', 'black' ]}
 )