Commit d859d1ca authored by Nathan Habib's avatar Nathan Habib
Browse files

batch commit

parent 6e49b1f6
...@@ -8,7 +8,6 @@ Requires the installation of ...@@ -8,7 +8,6 @@ Requires the installation of
`pip install "bigbench @ https://storage.googleapis.com/public_research_data/bigbench/bigbench-0.0.1.tar.gz"` `pip install "bigbench @ https://storage.googleapis.com/public_research_data/bigbench/bigbench-0.0.1.tar.gz"`
and is included so that the bigbench dependency can be avoided. and is included so that the bigbench dependency can be avoided.
""" """
import bigbench.api.util as bb_utils import bigbench.api.util as bb_utils
import datasets import datasets
from tqdm import tqdm from tqdm import tqdm
......
""" """
Take in a YAML, and output all other splits with this YAML Take in a YAML, and output all other splits with this YAML
""" """
import argparse import argparse
import os import os
......
""" """
Take in a YAML, and output all other splits with this YAML Take in a YAML, and output all other splits with this YAML
""" """
import argparse import argparse
import os import os
......
""" """
Take in a YAML, and output all other splits with this YAML Take in a YAML, and output all other splits with this YAML
""" """
import argparse import argparse
import os import os
......
"""
"""
import re import re
from typing import List from typing import List
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
# limitations under the License. # limitations under the License.
"""Library of instructions.""" """Library of instructions."""
import collections import collections
import json import json
import logging import logging
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
# limitations under the License. # limitations under the License.
"""Registry of all instructions.""" """Registry of all instructions."""
from lm_eval.tasks.ifeval import instructions from lm_eval.tasks.ifeval import instructions
......
""" """
Take in a YAML, and output all "other" splits with this YAML Take in a YAML, and output all "other" splits with this YAML
""" """
import argparse import argparse
import logging import logging
import os import os
......
...@@ -19,5 +19,3 @@ metric_list: ...@@ -19,5 +19,3 @@ metric_list:
higher_is_better: true higher_is_better: true
metadata: metadata:
version: 1.0 version: 1.0
dataset_kwargs:
trust_remote_code: true
"""
"""
import re import re
from typing import List from typing import List
......
...@@ -13,7 +13,6 @@ also determine when no answer is supported by the paragraph and abstain from ans ...@@ -13,7 +13,6 @@ also determine when no answer is supported by the paragraph and abstain from ans
Homepage: https://rajpurkar.github.io/SQuAD-explorer/ Homepage: https://rajpurkar.github.io/SQuAD-explorer/
""" """
from functools import partial from functools import partial
from math import exp from math import exp
......
"""This code mirrors the utils of the original winogrande task""" """ This code mirrors the utils of the original winogrande task """
def doc_to_text(doc): def doc_to_text(doc):
......
""" """
Take in a YAML, and output all "other" splits with this YAML Take in a YAML, and output all "other" splits with this YAML
""" """
import argparse import argparse
import os import os
......
...@@ -152,55 +152,6 @@ def general_detokenize(string): ...@@ -152,55 +152,6 @@ def general_detokenize(string):
return string return string
def get_file_task_name(filename: str) -> str:
"""
Given the sample results filenames, extracts and returns the task name.
"""
return filename[filename.find("_") + 1 : filename.rfind("_")]
def get_file_datetime(filename: str) -> str:
"""
Given the results and sample results filenames, extracts and returns the datetime.
"""
return filename[filename.rfind("_") + 1 :].replace(".json", "")
def sanitize_model_name(model_name: str) -> str:
"""
Given the model name, returns a sanitized version of it.
"""
return re.sub(r"[\"<>:/\|\\?\*\[\]]+", "__", model_name)
def sanitize_task_name(task_name: str) -> str:
"""
Given the task name, returns a sanitized version of it.
"""
return re.sub(r"\W", "_", task_name)
def get_latest_filename(filenames: List[str]) -> str:
"""
Given a list of filenames, returns the filename with the latest datetime.
"""
return max(filenames, key=lambda f: get_file_datetime(f))
def get_results_filenames(filenames: List[str]) -> List[str]:
"""
Extracts filenames that correspond to aggregated results.
"""
return [f for f in filenames if "/results_" in f and ".json" in f]
def get_sample_results_filenames(filenames: List[str]) -> List[str]:
"""
Extracts filenames that correspond to sample results.
"""
return [f for f in filenames if "/samples_" in f and ".json" in f]
def get_rolling_token_windows(token_list, prefix_token, max_seq_len, context_len): def get_rolling_token_windows(token_list, prefix_token, max_seq_len, context_len):
""" """
- context_len allows for a rolling window context, allowing each prediction window to potentially - context_len allows for a rolling window context, allowing each prediction window to potentially
......
...@@ -10,7 +10,7 @@ It uses the approach described in the [GPT-3 paper](https://arxiv.org/abs/2005.1 ...@@ -10,7 +10,7 @@ It uses the approach described in the [GPT-3 paper](https://arxiv.org/abs/2005.1
the match, splitting the training data into chunks the match, splitting the training data into chunks
3) Any chunks less than `minimum_slice_length` are removed 3) Any chunks less than `minimum_slice_length` are removed
4) Training data sets split into more than `too_dirty_cutoff` are considered 4) Training data sets split into more than `too_dirty_cutoff` are considered
completely contaminated and removed completey contaminated and removed
OpenAI used: OpenAI used:
``` ```
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
Usage: Usage:
python make_table_tasks.py --output <markdown_filename> python make_table_tasks.py --output <markdown_filename>
""" """
import json import json
import logging import logging
import os import os
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
Usage: Usage:
python make_table_tasks.py --output <markdown_filename> python make_table_tasks.py --output <markdown_filename>
""" """
import argparse import argparse
import logging import logging
......
...@@ -70,11 +70,6 @@ def main(): ...@@ -70,11 +70,6 @@ def main():
if docs is not None: if docs is not None:
iters.append(docs) iters.append(docs)
if len(iters) == 0:
raise ValueError(
f"Passed --sets '{args.sets}' but this task has no splits which match. Please specify a different --sets value."
)
docs = join_iters(iters) docs = join_iters(iters)
with open( with open(
......
...@@ -7,12 +7,7 @@ from pathlib import Path ...@@ -7,12 +7,7 @@ from pathlib import Path
import pandas as pd import pandas as pd
from zeno_client import ZenoClient, ZenoMetric from zeno_client import ZenoClient, ZenoMetric
from lm_eval.utils import ( from lm_eval.utils import eval_logger
eval_logger,
get_latest_filename,
get_results_filenames,
get_sample_results_filenames,
)
def parse_args(): def parse_args():
...@@ -50,15 +45,13 @@ def main(): ...@@ -50,15 +45,13 @@ def main():
assert len(models) > 0, "No model directories found in the data_path." assert len(models) > 0, "No model directories found in the data_path."
# Get the tasks from the latest results file of the first model.
tasks = set(tasks_for_model(models[0], args.data_path)) tasks = set(tasks_for_model(models[0], args.data_path))
# Get tasks names from the latest results file for each model for model in models: # Make sure that all models have the same tasks.
# Get intersection of tasks for all models
for model in models:
old_tasks = tasks.copy() old_tasks = tasks.copy()
task_count = len(tasks) task_count = len(tasks)
model_tasks = set(tasks_for_model(model, args.data_path))
model_tasks = tasks_for_model(model, args.data_path)
tasks.intersection(set(model_tasks)) tasks.intersection(set(model_tasks))
if task_count != len(tasks): if task_count != len(tasks):
...@@ -73,36 +66,22 @@ def main(): ...@@ -73,36 +66,22 @@ def main():
for task in tasks: for task in tasks:
# Upload data for all models # Upload data for all models
for model_index, model in enumerate(models): for model_index, model in enumerate(models):
# Get latest results and sample results for a model
model_dir = Path(args.data_path, model)
model_files = [f.as_posix() for f in model_dir.iterdir() if f.is_file()]
model_results_filenames = get_results_filenames(model_files)
model_sample_filenames = get_sample_results_filenames(model_files)
latest_results = get_latest_filename(
[Path(f).name for f in model_results_filenames]
)
latest_sample_results = get_latest_filename(
[Path(f).name for f in model_sample_filenames if task in f]
)
model_args = re.sub( model_args = re.sub(
r"[\"<>:/\|\\?\*\[\]]+", r"[\"<>:/\|\\?\*\[\]]+",
"__", "__",
json.load( json.load(
open(Path(args.data_path, model, latest_results), encoding="utf-8") open(Path(args.data_path, model, "results.json"), encoding="utf-8")
)["config"]["model_args"], )["config"]["model_args"],
) )
print(model_args)
data = []
with open( with open(
Path(args.data_path, model, latest_sample_results), Path(args.data_path, model, f"{model_args}_{task}.jsonl"),
"r", "r",
encoding="utf-8", encoding="utf-8",
) as file: ) as file:
for line in file: data = json.loads(file.read())
data.append(json.loads(line.strip()))
configs = json.load( configs = json.load(
open(Path(args.data_path, model, latest_results), encoding="utf-8") open(Path(args.data_path, model, "results.json"), encoding="utf-8")
)["configs"] )["configs"]
config = configs[task] config = configs[task]
...@@ -146,12 +125,10 @@ def tasks_for_model(model: str, data_path: str): ...@@ -146,12 +125,10 @@ def tasks_for_model(model: str, data_path: str):
Returns: Returns:
list: A list of tasks for the model. list: A list of tasks for the model.
""" """
# get latest model results for a given name dir_path = Path(data_path, model)
model_dir = Path(data_path, model) config = (
model_files = [f.as_posix() for f in model_dir.iterdir() if f.is_file()] json.load(open(Path(dir_path, "results.json"), encoding="utf-8"))["configs"],
model_results_filenames = get_results_filenames(model_files) )
latest_results = get_latest_filename(model_results_filenames)
config = (json.load(open(latest_results, encoding="utf-8"))["configs"],)
return list(config[0].keys()) return list(config[0].keys())
......
...@@ -23,7 +23,6 @@ DEEPSPARSE_MODELS_TASKS = [ ...@@ -23,7 +23,6 @@ DEEPSPARSE_MODELS_TASKS = [
] ]
@pytest.mark.skip(reason="test failing")
@pytest.mark.parametrize("model_id,task", SPARSEML_MODELS_TASKS) @pytest.mark.parametrize("model_id,task", SPARSEML_MODELS_TASKS)
def test_sparseml_eval(model_id, task): def test_sparseml_eval(model_id, task):
lm = get_model("sparseml").create_from_arg_string( lm = get_model("sparseml").create_from_arg_string(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment