checkout from main

b9bda0a3 · Nathan Habib · f7a6573f · b9bda0a3 · b9bda0a3 · b9bda0a3
Commit b9bda0a3 authored Jun 27, 2024 by Nathan Habib
5 changed files
--- a/scripts/clean_training_data/README.md
+++ b/scripts/clean_training_data/README.md
@@ -10,7 +10,7 @@ It uses the approach described in the [GPT-3 paper](https://arxiv.org/abs/2005.1
    the match, splitting the training data into chunks
   3) Any chunks less than `minimum_slice_length` are removed
   4) Training data sets split into more than `too_dirty_cutoff` are considered
-    completey contaminated and removed
+    completely contaminated and removed

 OpenAI used:
 ```

--- a/scripts/make_table_results.py
+++ b/scripts/make_table_results.py
@@ -2,6 +2,7 @@
 Usage:
   python make_table_tasks.py --output <markdown_filename>
 """
+
 import json
 import logging
 import os

--- a/scripts/make_table_tasks.py
+++ b/scripts/make_table_tasks.py
@@ -2,6 +2,7 @@
 Usage:
   python make_table_tasks.py --output <markdown_filename>
 """
+
 import argparse
 import logging


--- a/scripts/write_out.py
+++ b/scripts/write_out.py
@@ -70,6 +70,11 @@ def main():
            if docs is not None:
                iters.append(docs)

+        if len(iters) == 0:
+            raise ValueError(
+                f"Passed --sets '{args.sets}' but this task has no splits which match. Please specify a different --sets value."
+            )
+
        docs = join_iters(iters)

        with open(

--- a/scripts/zeno_visualize.py
+++ b/scripts/zeno_visualize.py
@@ -7,7 +7,12 @@ from pathlib import Path
 import pandas as pd
 from zeno_client import ZenoClient, ZenoMetric

-from lm_eval.utils import eval_logger
+from lm_eval.utils import (
+    eval_logger,
+    get_latest_filename,
+    get_results_filenames,
+    get_sample_results_filenames,
+)


 def parse_args():
@@ -45,13 +50,15 @@ def main():

    assert len(models) > 0, "No model directories found in the data_path."

+    # Get the tasks from the latest results file of the first model.
    tasks = set(tasks_for_model(models[0], args.data_path))

-    for model in models:  # Make sure that all models have the same tasks.
+    # Get tasks names from the latest results file for each model
+    # Get intersection of tasks for all models
+    for model in models:
        old_tasks = tasks.copy()
        task_count = len(tasks)
-
-        model_tasks = tasks_for_model(model, args.data_path)
+        model_tasks = set(tasks_for_model(model, args.data_path))
        tasks.intersection(set(model_tasks))

        if task_count != len(tasks):
@@ -66,22 +73,36 @@ def main():
    for task in tasks:
        # Upload data for all models
        for model_index, model in enumerate(models):
+            # Get latest results and sample results for a model
+            model_dir = Path(args.data_path, model)
+            model_files = [f.as_posix() for f in model_dir.iterdir() if f.is_file()]
+            model_results_filenames = get_results_filenames(model_files)
+            model_sample_filenames = get_sample_results_filenames(model_files)
+            latest_results = get_latest_filename(
+                [Path(f).name for f in model_results_filenames]
+            )
+            latest_sample_results = get_latest_filename(
+                [Path(f).name for f in model_sample_filenames if task in f]
+            )
            model_args = re.sub(
                r"[\"<>:/\|\\?\*\[\]]+",
                "__",
                json.load(
-                    open(Path(args.data_path, model, "results.json"), encoding="utf-8")
+                    open(Path(args.data_path, model, latest_results), encoding="utf-8")
                )["config"]["model_args"],
            )
+            print(model_args)
+            data = []
            with open(
-                Path(args.data_path, model, f"{model_args}_{task}.jsonl"),
+                Path(args.data_path, model, latest_sample_results),
                "r",
                encoding="utf-8",
            ) as file:
-                data = json.loads(file.read())
+                for line in file:
+                    data.append(json.loads(line.strip()))

            configs = json.load(
-                open(Path(args.data_path, model, "results.json"), encoding="utf-8")
+                open(Path(args.data_path, model, latest_results), encoding="utf-8")
            )["configs"]
            config = configs[task]

@@ -125,10 +146,12 @@ def tasks_for_model(model: str, data_path: str):
    Returns:
        list: A list of tasks for the model.
    """
-    dir_path = Path(data_path, model)
-    config = (
-        json.load(open(Path(dir_path, "results.json"), encoding="utf-8"))["configs"],
-    )
+    # get latest model results for a given name
+    model_dir = Path(data_path, model)
+    model_files = [f.as_posix() for f in model_dir.iterdir() if f.is_file()]
+    model_results_filenames = get_results_filenames(model_files)
+    latest_results = get_latest_filename(model_results_filenames)
+    config = (json.load(open(latest_results, encoding="utf-8"))["configs"],)
    return list(config[0].keys())