Merge pull request #862 from EleutherAI/bump-deps

[Refactor] Set python3.8 as allowed version

Merge pull request #862 from EleutherAI/bump-deps
[Refactor] Set python3.8 as allowed version
a3252ed7 · Hailey Schoelkopf · GitHub · 54a53d6f · fa2ae334 · a3252ed7
Unverified Commit a3252ed7 authored Sep 19, 2023 by Hailey Schoelkopf Committed by GitHub Sep 19, 2023
Showing with 13 additions and 14 deletions

.github/workflows/unit_tests.yml .github/workflows/unit_tests.yml +2 -3

lm_eval/decontamination/janitor.py lm_eval/decontamination/janitor.py +9 -9

mypy.ini mypy.ini +1 -1

pyproject.toml pyproject.toml +1 -1

No files found.
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@@ -40,7 +40,7 @@ jobs:
        flake8 . --count --select=F,E9,E71,E72,E501,E112,E113,W6 --extend-ignore=F541 --show-source --statistics --exit-zero
        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-      # mypy turned off for now
+#       # mypy turned off for now
 #    - name: Lint with mypy
 #      run: mypy . --ignore-missing-imports --check-untyped-defs --explicit-package-bases --warn-unreachable
 # Job 2
@@ -49,9 +49,8 @@ jobs:
 #     runs-on: ubuntu-latest
 #     strategy:
 #       matrix:
-#         python-version: [ "3.9", "3.10", "3.11" ]
+#         python-version: [ "3.8", "3.9", "3.10", "3.11" ]
 #     timeout-minutes: 30
 #     steps:
 #     - name: Checkout Code
 #       uses: actions/checkout@v3

--- a/lm_eval/decontamination/janitor.py
+++ b/lm_eval/decontamination/janitor.py
@@ -3,7 +3,7 @@ import string
 import pickle
 import traceback
 from pprint import pprint
-from typing import Iterator, Sequence, TypeVar
+from typing import Iterator, Sequence, TypeVar, List, Tuple
 # This is a cpp module. Compile janitor_util.cpp with:
 # c++ -O3 -Wall -shared -std=c++11 -fPIC $(python3 -m pybind11 --includes) janitor_util.cpp -o janitor_util$(python3-config --extension-suffix) -undefined dynamic_lookup
@@ -21,7 +21,7 @@ T = TypeVar("T")
 # Implementation from nltk source
 # https://www.nltk.org/_modules/nltk/util.html
-def form_ngrams(sequence: Iterator[T], n: int) -> Iterator[tuple[T, ...]]:
+def form_ngrams(sequence: Iterator[T], n: int) -> Iterator[Tuple[T, ...]]:
    history = []
    while n > 1:
        # PEP 479, prevent RuntimeError from being raised when StopIteration bubbles out of generator
@@ -70,14 +70,14 @@ def word_ngrams(s: str, n: int) -> Iterator[str]:
 # https://stackoverflow.com/questions/13734451/string-split-with-indices-in-python
-def split_indices(s: str) -> Iterator[tuple[str, tuple[int, int]]]:
+def split_indices(s: str) -> Iterator[Tuple[str, Tuple[int, int]]]:
    """Splits a string on whitespaces and records the indices of each in the original string.
    @:return generator((word, (start_idx, end_idx)), ...)
    """
    return ((m.group(0), (m.start(), m.end() - 1)) for m in re.finditer(r"\S+", s))
-def word_ngrams_indices(s: str, n: int) -> Iterator[tuple[str, tuple[int, int]]]:
+def word_ngrams_indices(s: str, n: int) -> Iterator[Tuple[str, Tuple[int, int]]]:
    """Splits a string into pairs of (ngram words, their start/end indices)"""
    tokens_with_indices = split_indices(s)
@@ -157,7 +157,7 @@ class Janitor:
            print("WARNING: Janitor running in python mode")
            return self.register_contaminant_python(dirt_string)
-    def clean(self, dirty_string: str) -> list[str]:
+    def clean(self, dirty_string: str) -> List[str]:
        """Clean a string (e.g. a training set) by removing all ngrams previously
        registered as contaminants. Returns a list of clean chunks, or empty if
        the string was too dirty"""
@@ -168,8 +168,8 @@ class Janitor:
            return self.clean_python(dirty_string)
    def _split_chunks(
-        self, dirty_string: str, dirty_parts: Sequence[tuple]
+        self, dirty_string: str, dirty_parts: Sequence[Tuple]
-    ) -> list[str]:
+    ) -> List[str]:
        clean_chunks = []
        splice_idx = 0
        end = -1
@@ -197,7 +197,7 @@ class Janitor:
            janitor_util.clean_ngram(dirt_string, self.delete_chars, self.ngram_n)
        )
-    def clean_cpp(self, dirty_string: str) -> list[str]:
+    def clean_cpp(self, dirty_string: str) -> List[str]:
        contamination_indices = janitor_util.clean_ngram_with_indices(
            dirty_string, self.delete_chars, self.ngram_n
        )
@@ -215,7 +215,7 @@ class Janitor:
            word_ngrams(self.normalize_string(dirt_string), self.ngram_n)
        )
-    def clean_python(self, dirty_string: str) -> list[str]:
+    def clean_python(self, dirty_string: str) -> List[str]:
        contamination_indices = (
            (None, *idx_pair)
            for dirty_ngram, idx_pair in word_ngrams_indices(dirty_string, self.ngram_n)

--- a/mypy.ini
+++ b/mypy.ini
 [mypy]
-python_version = 3.9
+python_version = 3.8
 show_traceback = True
 check_untyped_defs = True
 no_implicit_reexport = True

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,7 @@ classifiers = [
    "License :: OSI Approved :: MIT License",
    "Operating System :: OS Independent",
 ]
-requires-python = ">=3.9"
+requires-python = ">=3.8"
 license = { "text" = "MIT" }
 dependencies = [
    "accelerate>=0.21.0",