sentence_splitter.py 643 Bytes
Newer Older
1
2
import re

3
4
from filelock import FileLock

5
6
7
8
9
10
11
12
13

try:
    import nltk

    NLTK_AVAILABLE = True
except (ImportError, ModuleNotFoundError):
    NLTK_AVAILABLE = False

if NLTK_AVAILABLE:
14
    with FileLock("a_random_string") as lock:
15
16
17
18
        nltk.download("punkt", quiet=True)


def add_newline_to_end_of_each_sentence(x: str) -> str:
19
    """This was added to get rougeLsum scores matching published rougeL scores for BART and PEGASUS."""
20
    re.sub("<n>", "", x)  # remove pegasus newline char
21
    assert NLTK_AVAILABLE, "nltk must be installed to separate newlines between sentences. (pip install nltk)"
22
    return "\n".join(nltk.sent_tokenize(x))