Commit d859d1ca authored by Nathan Habib's avatar Nathan Habib
Browse files

batch commit

parent 6e49b1f6
......@@ -8,7 +8,6 @@ Requires the installation of
`pip install "bigbench @ https://storage.googleapis.com/public_research_data/bigbench/bigbench-0.0.1.tar.gz"`
and is included so that the bigbench dependency can be avoided.
"""
import bigbench.api.util as bb_utils
import datasets
from tqdm import tqdm
......
"""
Take in a YAML, and output all other splits with this YAML
"""
import argparse
import os
......
"""
Take in a YAML, and output all other splits with this YAML
"""
import argparse
import os
......
"""
Take in a YAML, and output all other splits with this YAML
"""
import argparse
import os
......
"""
"""
import re
from typing import List
......
......@@ -13,7 +13,6 @@
# limitations under the License.
"""Library of instructions."""
import collections
import json
import logging
......
......@@ -13,7 +13,6 @@
# limitations under the License.
"""Registry of all instructions."""
from lm_eval.tasks.ifeval import instructions
......
"""
Take in a YAML, and output all "other" splits with this YAML
"""
import argparse
import logging
import os
......
......@@ -19,5 +19,3 @@ metric_list:
higher_is_better: true
metadata:
version: 1.0
dataset_kwargs:
trust_remote_code: true
"""
"""
import re
from typing import List
......
......@@ -13,7 +13,6 @@ also determine when no answer is supported by the paragraph and abstain from ans
Homepage: https://rajpurkar.github.io/SQuAD-explorer/
"""
from functools import partial
from math import exp
......
"""This code mirrors the utils of the original winogrande task"""
""" This code mirrors the utils of the original winogrande task """
def doc_to_text(doc):
......
"""
Take in a YAML, and output all "other" splits with this YAML
"""
import argparse
import os
......
......@@ -152,55 +152,6 @@ def general_detokenize(string):
return string
def get_file_task_name(filename: str) -> str:
"""
Given the sample results filenames, extracts and returns the task name.
"""
return filename[filename.find("_") + 1 : filename.rfind("_")]
def get_file_datetime(filename: str) -> str:
"""
Given the results and sample results filenames, extracts and returns the datetime.
"""
return filename[filename.rfind("_") + 1 :].replace(".json", "")
def sanitize_model_name(model_name: str) -> str:
"""
Given the model name, returns a sanitized version of it.
"""
return re.sub(r"[\"<>:/\|\\?\*\[\]]+", "__", model_name)
def sanitize_task_name(task_name: str) -> str:
"""
Given the task name, returns a sanitized version of it.
"""
return re.sub(r"\W", "_", task_name)
def get_latest_filename(filenames: List[str]) -> str:
"""
Given a list of filenames, returns the filename with the latest datetime.
"""
return max(filenames, key=lambda f: get_file_datetime(f))
def get_results_filenames(filenames: List[str]) -> List[str]:
"""
Extracts filenames that correspond to aggregated results.
"""
return [f for f in filenames if "/results_" in f and ".json" in f]
def get_sample_results_filenames(filenames: List[str]) -> List[str]:
"""
Extracts filenames that correspond to sample results.
"""
return [f for f in filenames if "/samples_" in f and ".json" in f]
def get_rolling_token_windows(token_list, prefix_token, max_seq_len, context_len):
"""
- context_len allows for a rolling window context, allowing each prediction window to potentially
......
......@@ -10,7 +10,7 @@ It uses the approach described in the [GPT-3 paper](https://arxiv.org/abs/2005.1
the match, splitting the training data into chunks
3) Any chunks less than `minimum_slice_length` are removed
4) Training data sets split into more than `too_dirty_cutoff` are considered
completely contaminated and removed
completey contaminated and removed
OpenAI used:
```
......
......@@ -2,7 +2,6 @@
Usage:
python make_table_tasks.py --output <markdown_filename>
"""
import json
import logging
import os
......
......@@ -2,7 +2,6 @@
Usage:
python make_table_tasks.py --output <markdown_filename>
"""
import argparse
import logging
......
......@@ -70,11 +70,6 @@ def main():
if docs is not None:
iters.append(docs)
if len(iters) == 0:
raise ValueError(
f"Passed --sets '{args.sets}' but this task has no splits which match. Please specify a different --sets value."
)
docs = join_iters(iters)
with open(
......
......@@ -7,12 +7,7 @@ from pathlib import Path
import pandas as pd
from zeno_client import ZenoClient, ZenoMetric
from lm_eval.utils import (
eval_logger,
get_latest_filename,
get_results_filenames,
get_sample_results_filenames,
)
from lm_eval.utils import eval_logger
def parse_args():
......@@ -50,15 +45,13 @@ def main():
assert len(models) > 0, "No model directories found in the data_path."
# Get the tasks from the latest results file of the first model.
tasks = set(tasks_for_model(models[0], args.data_path))
# Get tasks names from the latest results file for each model
# Get intersection of tasks for all models
for model in models:
for model in models: # Make sure that all models have the same tasks.
old_tasks = tasks.copy()
task_count = len(tasks)
model_tasks = set(tasks_for_model(model, args.data_path))
model_tasks = tasks_for_model(model, args.data_path)
tasks.intersection(set(model_tasks))
if task_count != len(tasks):
......@@ -73,36 +66,22 @@ def main():
for task in tasks:
# Upload data for all models
for model_index, model in enumerate(models):
# Get latest results and sample results for a model
model_dir = Path(args.data_path, model)
model_files = [f.as_posix() for f in model_dir.iterdir() if f.is_file()]
model_results_filenames = get_results_filenames(model_files)
model_sample_filenames = get_sample_results_filenames(model_files)
latest_results = get_latest_filename(
[Path(f).name for f in model_results_filenames]
)
latest_sample_results = get_latest_filename(
[Path(f).name for f in model_sample_filenames if task in f]
)
model_args = re.sub(
r"[\"<>:/\|\\?\*\[\]]+",
"__",
json.load(
open(Path(args.data_path, model, latest_results), encoding="utf-8")
open(Path(args.data_path, model, "results.json"), encoding="utf-8")
)["config"]["model_args"],
)
print(model_args)
data = []
with open(
Path(args.data_path, model, latest_sample_results),
Path(args.data_path, model, f"{model_args}_{task}.jsonl"),
"r",
encoding="utf-8",
) as file:
for line in file:
data.append(json.loads(line.strip()))
data = json.loads(file.read())
configs = json.load(
open(Path(args.data_path, model, latest_results), encoding="utf-8")
open(Path(args.data_path, model, "results.json"), encoding="utf-8")
)["configs"]
config = configs[task]
......@@ -146,12 +125,10 @@ def tasks_for_model(model: str, data_path: str):
Returns:
list: A list of tasks for the model.
"""
# get latest model results for a given name
model_dir = Path(data_path, model)
model_files = [f.as_posix() for f in model_dir.iterdir() if f.is_file()]
model_results_filenames = get_results_filenames(model_files)
latest_results = get_latest_filename(model_results_filenames)
config = (json.load(open(latest_results, encoding="utf-8"))["configs"],)
dir_path = Path(data_path, model)
config = (
json.load(open(Path(dir_path, "results.json"), encoding="utf-8"))["configs"],
)
return list(config[0].keys())
......
......@@ -23,7 +23,6 @@ DEEPSPARSE_MODELS_TASKS = [
]
@pytest.mark.skip(reason="test failing")
@pytest.mark.parametrize("model_id,task", SPARSEML_MODELS_TASKS)
def test_sparseml_eval(model_id, task):
lm = get_model("sparseml").create_from_arg_string(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment