Commit bcd6faaa authored by Baber's avatar Baber
Browse files

fixup! merge

parent 58fc07b1
...@@ -106,10 +106,5 @@ class GroupConfig: ...@@ -106,10 +106,5 @@ class GroupConfig:
except (TypeError, OSError): except (TypeError, OSError):
return str(value) return str(value)
@property
def version(self) -> str:
"""Returns the version of the group configuration."""
return self.metadata.get("version", "1.0")
def __repr__(self): def __repr__(self):
return f"GroupConfig(group={self.group},group_alias={self.group_alias})" return f"GroupConfig(group={self.group},group_alias={self.group_alias})"
...@@ -340,23 +340,25 @@ class EvaluatorConfig: ...@@ -340,23 +340,25 @@ class EvaluatorConfig:
metadata=self.metadata if self.metadata else {}, metadata=self.metadata if self.metadata else {},
) )
task_names = task_manager.match_tasks(self.tasks) task_names = self.tasks
# TODO: FIX TASKS VALIDATION!!!
# Check for any individual task files in the list # task_names = task_manager.match_tasks(self.tasks)
for task in [task for task in self.tasks if task not in task_names]:
task_path = Path(task) # # Check for any individual task files in the list
if task_path.is_file(): # for task in [task for task in self.tasks if task not in task_names]:
config = utils.load_yaml_config(str(task_path)) # task_path = Path(task)
task_names.append(config) # if task_path.is_file():
# config = utils.load_yaml_config(str(task_path))
# Check for missing tasks # task_names.append(config)
task_missing = [ #
task for task in self.tasks if task not in task_names and "*" not in task # # Check for missing tasks
] # task_missing = [
# task for task in self.tasks if task not in task_names and "*" not in task
if task_missing: # ]
missing = ", ".join(task_missing) #
raise ValueError(f"Tasks not found: {missing}") # if task_missing:
# missing = ", ".join(task_missing)
# raise ValueError(f"Tasks not found: {missing}")
# Update tasks with resolved names # Update tasks with resolved names
self.tasks = task_names self.tasks = task_names
......
...@@ -30,6 +30,7 @@ from lm_eval.evaluator_utils import ( ...@@ -30,6 +30,7 @@ from lm_eval.evaluator_utils import (
from lm_eval.loggers import EvaluationTracker from lm_eval.loggers import EvaluationTracker
from lm_eval.loggers.utils import add_env_info, add_tokenizer_info, get_git_commit_hash from lm_eval.loggers.utils import add_env_info, add_tokenizer_info, get_git_commit_hash
from lm_eval.tasks import TaskManager from lm_eval.tasks import TaskManager
from lm_eval.tasks.manager import get_task_dict
from lm_eval.utils import ( from lm_eval.utils import (
get_logger, get_logger,
handle_non_serializable, handle_non_serializable,
......
...@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" ...@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
name = "lm_eval" name = "lm_eval"
version = "0.4.9.1" version = "0.4.9.1"
authors = [ authors = [
{name="EleutherAI", email="contact@eleuther.ai"} { name = "EleutherAI", email = "contact@eleuther.ai" }
] ]
description = "A framework for evaluating language models" description = "A framework for evaluating language models"
readme = "README.md" readme = "README.md"
...@@ -19,25 +19,22 @@ classifiers = [ ...@@ -19,25 +19,22 @@ classifiers = [
requires-python = ">=3.9" requires-python = ">=3.9"
license = { "text" = "MIT" } license = { "text" = "MIT" }
dependencies = [ dependencies = [
"accelerate>=0.26.0", "accelerate>=0.26.0",
"datasets>=2.16.0,<4.0", "datasets>=2.16.0,<4.0",
"evaluate>=0.4.0", "evaluate>=0.4.0",
"peft>=0.2.0", "peft>=0.2.0",
"pytablewriter", "pytablewriter",
"rouge-score>=0.0.4", "rouge-score>=0.0.4",
"sacrebleu>=1.5.0", "sacrebleu>=1.5.0",
"scikit-learn>=0.24.1", "scikit-learn>=0.24.1",
"sqlitedict", "sqlitedict",
"torch>=1.8", "torch>=1.8",
"transformers>=4.1", "transformers>=4.1",
"dill", "dill",
"word2number", "word2number",
"more_itertools" "more_itertools"
] ]
[tool.setuptools.packages.find]
include = ["lm_eval*"]
# required to include yaml files in pip installation # required to include yaml files in pip installation
[tool.setuptools.package-data] [tool.setuptools.package-data]
lm_eval = ["**/*.yaml", "tasks/**/*"] lm_eval = ["**/*.yaml", "tasks/**/*"]
...@@ -63,7 +60,7 @@ ifeval = ["langdetect", "immutabledict", "nltk>=3.9.1"] ...@@ -63,7 +60,7 @@ ifeval = ["langdetect", "immutabledict", "nltk>=3.9.1"]
ipex = ["optimum"] ipex = ["optimum"]
japanese_leaderboard = ["emoji==2.14.0", "neologdn==0.5.3", "fugashi[unidic-lite]", "rouge_score>=0.1.2"] japanese_leaderboard = ["emoji==2.14.0", "neologdn==0.5.3", "fugashi[unidic-lite]", "rouge_score>=0.1.2"]
longbench = ["jieba", "fuzzywuzzy", "rouge"] longbench = ["jieba", "fuzzywuzzy", "rouge"]
libra=["pymorphy2"] libra = ["pymorphy2"]
mamba = ["mamba_ssm", "causal-conv1d==1.0.2", "torch"] mamba = ["mamba_ssm", "causal-conv1d==1.0.2", "torch"]
math = ["sympy>=1.12", "antlr4-python3-runtime==4.11", "math_verify[antlr4_11_0]"] math = ["sympy>=1.12", "antlr4-python3-runtime==4.11", "math_verify[antlr4_11_0]"]
multilingual = ["nagisa>=0.2.7", "jieba>=0.42.1", "pycountry"] multilingual = ["nagisa>=0.2.7", "jieba>=0.42.1", "pycountry"]
...@@ -76,16 +73,16 @@ sentencepiece = ["sentencepiece>=0.1.98"] ...@@ -76,16 +73,16 @@ sentencepiece = ["sentencepiece>=0.1.98"]
sparsify = ["sparsify"] sparsify = ["sparsify"]
discrim_eval = ["statsmodels==0.14.4"] discrim_eval = ["statsmodels==0.14.4"]
tasks = [ tasks = [
"lm_eval[acpbench]", "lm_eval[acpbench]",
"lm_eval[discrim_eval]", "lm_eval[discrim_eval]",
"lm_eval[ifeval]", "lm_eval[ifeval]",
"lm_eval[japanese_leaderboard]", "lm_eval[japanese_leaderboard]",
"lm_eval[longbench]", "lm_eval[longbench]",
"lm_eval[libra]", "lm_eval[libra]",
"lm_eval[mamba]", "lm_eval[mamba]",
"lm_eval[math]", "lm_eval[math]",
"lm_eval[multilingual]", "lm_eval[multilingual]",
"lm_eval[ruler]" "lm_eval[ruler]"
] ]
testing = ["pytest", "pytest-cov", "pytest-xdist"] testing = ["pytest", "pytest-cov", "pytest-xdist"]
unitxt = ["unitxt==1.22.0"] unitxt = ["unitxt==1.22.0"]
...@@ -93,14 +90,6 @@ vllm = ["vllm>=0.4.2"] ...@@ -93,14 +90,6 @@ vllm = ["vllm>=0.4.2"]
wandb = ["wandb>=0.16.3", "pandas", "numpy"] wandb = ["wandb>=0.16.3", "pandas", "numpy"]
zeno = ["pandas", "zeno-client"] zeno = ["pandas", "zeno-client"]
[project.scripts]
lm-eval = "lm_eval.__main__:cli_evaluate"
lm_eval = "lm_eval.__main__:cli_evaluate"
[project.urls]
Homepage = "https://github.com/EleutherAI/lm-evaluation-harness"
Repository = "https://github.com/EleutherAI/lm-evaluation-harness"
[tool.pymarkdown] [tool.pymarkdown]
plugins.md013.enabled = false # line-length plugins.md013.enabled = false # line-length
plugins.md024.allow_different_nesting = true # no-duplicate-headers plugins.md024.allow_different_nesting = true # no-duplicate-headers
...@@ -123,9 +112,5 @@ combine-as-imports = true ...@@ -123,9 +112,5 @@ combine-as-imports = true
known-first-party = ["lm_eval"] known-first-party = ["lm_eval"]
lines-after-imports = 2 lines-after-imports = 2
# required to include yaml files in pip installation
[tool.setuptools.package-data]
lm_eval = ["**/*.yaml", "tasks/**/*"]
[tool.setuptools.packages.find] [tool.setuptools.packages.find]
include = ["lm_eval*"] include = ["lm_eval*"]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment