Unverified Commit 2de3688f authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

update nltk version to require 3.9.1 (#2259)

parent 40010ec1
......@@ -26,4 +26,4 @@ metric_list:
aggregation: !function utils.agg_inst_level_acc
higher_is_better: true
metadata:
version: 3.0
version: 4.0
......@@ -20,14 +20,28 @@ import re
import immutabledict
import nltk
import pkg_resources
from packaging import version
# Downloading 'punkt' with nltk<3.9 has a remote code vuln.
# see https://github.com/EleutherAI/lm-evaluation-harness/issues/2210
# and https://github.com/nltk/nltk/issues/3266
# for more information.
NLTK_MIN_VERSION = "3.9.1"
def download_nltk_resources():
"""Download 'punkt' if not already installed"""
nltk_version = pkg_resources.get_distribution("nltk").version
assert (
version.parse(nltk_version) >= version.parse(NLTK_MIN_VERSION)
), f"`nltk` version {nltk_version} is not >= {NLTK_MIN_VERSION}. Please update `nltk` before proceeding--older versions are vulnerable to a remote code execution vulnerability."
try:
nltk.data.find("tokenizers/punkt")
nltk.data.find("tokenizers/punkt_tab")
except LookupError:
nltk.download("punkt")
nltk.download("punkt_tab")
download_nltk_resources()
......
......@@ -110,10 +110,6 @@ def test_instruction_following_loose(
def process_results(doc, results):
eval_logger.warning(
"This task is meant for chat-finetuned models, and may not give meaningful results for models other than `openai` or `anthropic` if `doc_to_text` in its YAML is not wrapped in the appropriate chat template string. This warning will be removed when chat templating support is added natively to local models"
)
inp = InputExample(
key=doc["key"],
instruction_id_list=doc["instruction_id_list"],
......
......@@ -26,6 +26,6 @@ metric_list:
aggregation: !function utils.agg_inst_level_acc
higher_is_better: true
metadata:
version: 2.0
version: 3.0
fewshot_config:
sampler: first_n
......@@ -20,14 +20,28 @@ import re
import immutabledict
import nltk
import pkg_resources
from packaging import version
# Downloading 'punkt' with nltk<3.9 has a remote code vuln.
# see https://github.com/EleutherAI/lm-evaluation-harness/issues/2210
# and https://github.com/nltk/nltk/issues/3266
# for more information.
NLTK_MIN_VERSION = "3.9.1"
def download_nltk_resources():
"""Download 'punkt' if not already installed"""
nltk_version = pkg_resources.get_distribution("nltk").version
assert (
version.parse(nltk_version) >= version.parse(NLTK_MIN_VERSION)
), f"`nltk` version {nltk_version} is not >= {NLTK_MIN_VERSION}. Please update `nltk` before proceeding--older versions are vulnerable to a remote code execution vulnerability."
try:
nltk.data.find("tokenizers/punkt")
nltk.data.find("tokenizers/punkt_tab")
except LookupError:
nltk.download("punkt")
nltk.download("punkt_tab")
download_nltk_resources()
......
......@@ -62,7 +62,7 @@ dev = ["pytest", "pytest-cov", "pytest-xdist", "pre-commit", "mypy"]
deepsparse = ["deepsparse-nightly[llm]>=1.8.0.20240404"]
gptq = ["auto-gptq[triton]>=0.6.0"]
hf_transfer = ["hf_transfer"]
ifeval = ["langdetect", "immutabledict"]
ifeval = ["langdetect", "immutabledict", "nltk>=3.9.1"]
neuronx = ["optimum[neuronx]"]
mamba = ["mamba_ssm", "causal-conv1d==1.0.2"]
math = ["sympy>=1.12", "antlr4-python3-runtime==4.11"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment