Unverified Commit 2de3688f authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

update nltk version to require 3.9.1 (#2259)

parent 40010ec1
...@@ -26,4 +26,4 @@ metric_list: ...@@ -26,4 +26,4 @@ metric_list:
aggregation: !function utils.agg_inst_level_acc aggregation: !function utils.agg_inst_level_acc
higher_is_better: true higher_is_better: true
metadata: metadata:
version: 3.0 version: 4.0
...@@ -20,14 +20,28 @@ import re ...@@ -20,14 +20,28 @@ import re
import immutabledict import immutabledict
import nltk import nltk
import pkg_resources
from packaging import version
# Downloading 'punkt' with nltk<3.9 has a remote code vuln.
# see https://github.com/EleutherAI/lm-evaluation-harness/issues/2210
# and https://github.com/nltk/nltk/issues/3266
# for more information.
NLTK_MIN_VERSION = "3.9.1"
def download_nltk_resources(): def download_nltk_resources():
"""Download 'punkt' if not already installed""" """Download 'punkt' if not already installed"""
nltk_version = pkg_resources.get_distribution("nltk").version
assert (
version.parse(nltk_version) >= version.parse(NLTK_MIN_VERSION)
), f"`nltk` version {nltk_version} is not >= {NLTK_MIN_VERSION}. Please update `nltk` before proceeding--older versions are vulnerable to a remote code execution vulnerability."
try: try:
nltk.data.find("tokenizers/punkt") nltk.data.find("tokenizers/punkt_tab")
except LookupError: except LookupError:
nltk.download("punkt") nltk.download("punkt_tab")
download_nltk_resources() download_nltk_resources()
......
...@@ -110,10 +110,6 @@ def test_instruction_following_loose( ...@@ -110,10 +110,6 @@ def test_instruction_following_loose(
def process_results(doc, results): def process_results(doc, results):
eval_logger.warning(
"This task is meant for chat-finetuned models, and may not give meaningful results for models other than `openai` or `anthropic` if `doc_to_text` in its YAML is not wrapped in the appropriate chat template string. This warning will be removed when chat templating support is added natively to local models"
)
inp = InputExample( inp = InputExample(
key=doc["key"], key=doc["key"],
instruction_id_list=doc["instruction_id_list"], instruction_id_list=doc["instruction_id_list"],
......
...@@ -26,6 +26,6 @@ metric_list: ...@@ -26,6 +26,6 @@ metric_list:
aggregation: !function utils.agg_inst_level_acc aggregation: !function utils.agg_inst_level_acc
higher_is_better: true higher_is_better: true
metadata: metadata:
version: 2.0 version: 3.0
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
...@@ -20,14 +20,28 @@ import re ...@@ -20,14 +20,28 @@ import re
import immutabledict import immutabledict
import nltk import nltk
import pkg_resources
from packaging import version
# Downloading 'punkt' with nltk<3.9 has a remote code vuln.
# see https://github.com/EleutherAI/lm-evaluation-harness/issues/2210
# and https://github.com/nltk/nltk/issues/3266
# for more information.
NLTK_MIN_VERSION = "3.9.1"
def download_nltk_resources(): def download_nltk_resources():
"""Download 'punkt' if not already installed""" """Download 'punkt' if not already installed"""
nltk_version = pkg_resources.get_distribution("nltk").version
assert (
version.parse(nltk_version) >= version.parse(NLTK_MIN_VERSION)
), f"`nltk` version {nltk_version} is not >= {NLTK_MIN_VERSION}. Please update `nltk` before proceeding--older versions are vulnerable to a remote code execution vulnerability."
try: try:
nltk.data.find("tokenizers/punkt") nltk.data.find("tokenizers/punkt_tab")
except LookupError: except LookupError:
nltk.download("punkt") nltk.download("punkt_tab")
download_nltk_resources() download_nltk_resources()
......
...@@ -62,7 +62,7 @@ dev = ["pytest", "pytest-cov", "pytest-xdist", "pre-commit", "mypy"] ...@@ -62,7 +62,7 @@ dev = ["pytest", "pytest-cov", "pytest-xdist", "pre-commit", "mypy"]
deepsparse = ["deepsparse-nightly[llm]>=1.8.0.20240404"] deepsparse = ["deepsparse-nightly[llm]>=1.8.0.20240404"]
gptq = ["auto-gptq[triton]>=0.6.0"] gptq = ["auto-gptq[triton]>=0.6.0"]
hf_transfer = ["hf_transfer"] hf_transfer = ["hf_transfer"]
ifeval = ["langdetect", "immutabledict"] ifeval = ["langdetect", "immutabledict", "nltk>=3.9.1"]
neuronx = ["optimum[neuronx]"] neuronx = ["optimum[neuronx]"]
mamba = ["mamba_ssm", "causal-conv1d==1.0.2"] mamba = ["mamba_ssm", "causal-conv1d==1.0.2"]
math = ["sympy>=1.12", "antlr4-python3-runtime==4.11"] math = ["sympy>=1.12", "antlr4-python3-runtime==4.11"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment