Commit b9bda0a3 authored by Nathan Habib's avatar Nathan Habib
Browse files

checkout from main

parent f7a6573f
...@@ -10,7 +10,7 @@ It uses the approach described in the [GPT-3 paper](https://arxiv.org/abs/2005.1 ...@@ -10,7 +10,7 @@ It uses the approach described in the [GPT-3 paper](https://arxiv.org/abs/2005.1
the match, splitting the training data into chunks the match, splitting the training data into chunks
3) Any chunks less than `minimum_slice_length` are removed 3) Any chunks less than `minimum_slice_length` are removed
4) Training data sets split into more than `too_dirty_cutoff` are considered 4) Training data sets split into more than `too_dirty_cutoff` are considered
completey contaminated and removed completely contaminated and removed
OpenAI used: OpenAI used:
``` ```
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
Usage: Usage:
python make_table_tasks.py --output <markdown_filename> python make_table_tasks.py --output <markdown_filename>
""" """
import json import json
import logging import logging
import os import os
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
Usage: Usage:
python make_table_tasks.py --output <markdown_filename> python make_table_tasks.py --output <markdown_filename>
""" """
import argparse import argparse
import logging import logging
......
...@@ -70,6 +70,11 @@ def main(): ...@@ -70,6 +70,11 @@ def main():
if docs is not None: if docs is not None:
iters.append(docs) iters.append(docs)
if len(iters) == 0:
raise ValueError(
f"Passed --sets '{args.sets}' but this task has no splits which match. Please specify a different --sets value."
)
docs = join_iters(iters) docs = join_iters(iters)
with open( with open(
......
...@@ -7,7 +7,12 @@ from pathlib import Path ...@@ -7,7 +7,12 @@ from pathlib import Path
import pandas as pd import pandas as pd
from zeno_client import ZenoClient, ZenoMetric from zeno_client import ZenoClient, ZenoMetric
from lm_eval.utils import eval_logger from lm_eval.utils import (
eval_logger,
get_latest_filename,
get_results_filenames,
get_sample_results_filenames,
)
def parse_args(): def parse_args():
...@@ -45,13 +50,15 @@ def main(): ...@@ -45,13 +50,15 @@ def main():
assert len(models) > 0, "No model directories found in the data_path." assert len(models) > 0, "No model directories found in the data_path."
# Get the tasks from the latest results file of the first model.
tasks = set(tasks_for_model(models[0], args.data_path)) tasks = set(tasks_for_model(models[0], args.data_path))
for model in models: # Make sure that all models have the same tasks. # Get tasks names from the latest results file for each model
# Get intersection of tasks for all models
for model in models:
old_tasks = tasks.copy() old_tasks = tasks.copy()
task_count = len(tasks) task_count = len(tasks)
model_tasks = set(tasks_for_model(model, args.data_path))
model_tasks = tasks_for_model(model, args.data_path)
tasks.intersection(set(model_tasks)) tasks.intersection(set(model_tasks))
if task_count != len(tasks): if task_count != len(tasks):
...@@ -66,22 +73,36 @@ def main(): ...@@ -66,22 +73,36 @@ def main():
for task in tasks: for task in tasks:
# Upload data for all models # Upload data for all models
for model_index, model in enumerate(models): for model_index, model in enumerate(models):
# Get latest results and sample results for a model
model_dir = Path(args.data_path, model)
model_files = [f.as_posix() for f in model_dir.iterdir() if f.is_file()]
model_results_filenames = get_results_filenames(model_files)
model_sample_filenames = get_sample_results_filenames(model_files)
latest_results = get_latest_filename(
[Path(f).name for f in model_results_filenames]
)
latest_sample_results = get_latest_filename(
[Path(f).name for f in model_sample_filenames if task in f]
)
model_args = re.sub( model_args = re.sub(
r"[\"<>:/\|\\?\*\[\]]+", r"[\"<>:/\|\\?\*\[\]]+",
"__", "__",
json.load( json.load(
open(Path(args.data_path, model, "results.json"), encoding="utf-8") open(Path(args.data_path, model, latest_results), encoding="utf-8")
)["config"]["model_args"], )["config"]["model_args"],
) )
print(model_args)
data = []
with open( with open(
Path(args.data_path, model, f"{model_args}_{task}.jsonl"), Path(args.data_path, model, latest_sample_results),
"r", "r",
encoding="utf-8", encoding="utf-8",
) as file: ) as file:
data = json.loads(file.read()) for line in file:
data.append(json.loads(line.strip()))
configs = json.load( configs = json.load(
open(Path(args.data_path, model, "results.json"), encoding="utf-8") open(Path(args.data_path, model, latest_results), encoding="utf-8")
)["configs"] )["configs"]
config = configs[task] config = configs[task]
...@@ -125,10 +146,12 @@ def tasks_for_model(model: str, data_path: str): ...@@ -125,10 +146,12 @@ def tasks_for_model(model: str, data_path: str):
Returns: Returns:
list: A list of tasks for the model. list: A list of tasks for the model.
""" """
dir_path = Path(data_path, model) # get latest model results for a given name
config = ( model_dir = Path(data_path, model)
json.load(open(Path(dir_path, "results.json"), encoding="utf-8"))["configs"], model_files = [f.as_posix() for f in model_dir.iterdir() if f.is_file()]
) model_results_filenames = get_results_filenames(model_files)
latest_results = get_latest_filename(model_results_filenames)
config = (json.load(open(latest_results, encoding="utf-8"))["configs"],)
return list(config[0].keys()) return list(config[0].keys())
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment