Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
0351bb6b
Commit
0351bb6b
authored
Dec 19, 2024
by
Baber
Browse files
handle nltk punkt_tab
parent
07429c86
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
25 additions
and
0 deletions
+25
-0
lm_eval/tasks/ruler/prepare.py
lm_eval/tasks/ruler/prepare.py
+25
-0
No files found.
lm_eval/tasks/ruler/prepare.py
View file @
0351bb6b
...
...
@@ -4,7 +4,11 @@ import uuid
import
numpy
as
np
import
wonderwords
import
nltk
from
nltk
import
sent_tokenize
from
packaging.version
import
parse
as
parse_version
from
importlib.metadata
import
version
from
tqdm
import
tqdm
from
transformers
import
AutoTokenizer
...
...
@@ -32,6 +36,27 @@ WORDS = sorted(list(set(words)))
# Positions
DEPTHS
=
list
(
np
.
round
(
np
.
linspace
(
0
,
100
,
num
=
40
,
endpoint
=
True
)).
astype
(
int
))
NLTK_MIN_VERSION
=
"3.9.1"
RANK
=
os
.
environ
.
get
(
"LOCAL_RANK"
,
"0"
)
def
download_nltk_resources
():
"""Download 'punkt' if not already installed"""
assert
(
(
nltk_version
:
=
parse_version
(
version
(
"nltk"
)))
>=
parse_version
(
NLTK_MIN_VERSION
)
),
f
"`nltk` version
{
nltk_version
}
is not >=
{
NLTK_MIN_VERSION
}
. Please update `nltk` before proceeding--older versions are vulnerable to a remote code execution vulnerability."
try
:
nltk
.
data
.
find
(
"tokenizers/punkt_tab"
)
except
LookupError
:
if
RANK
==
"0"
:
nltk
.
download
(
"punkt_tab"
)
print
(
"Downloaded punkt_tab on rank 0"
)
download_nltk_resources
()
def
generate_random_number
(
num_digits
=
7
):
lower_bound
=
10
**
(
num_digits
-
1
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment