Commit 835cc40e authored by lintangsutawika's avatar lintangsutawika
Browse files

merged latest and added altworld files

parents 8da401e0 c9bbec6e
......@@ -15,3 +15,5 @@ metric_list:
- metric: !function metrics.bleu
aggregation: !function metrics.agg_bleu
higher_is_better: true
metadata:
- version: 0.0
......@@ -13,3 +13,5 @@ metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
......@@ -10,3 +10,5 @@ doc_to_target: label
doc_to_choice: !function utils.doc_to_choice
metric_list:
- metric: acc
metadata:
- version: 1.0
......@@ -15,3 +15,5 @@ metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
......@@ -14,3 +14,5 @@ metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
......@@ -16,3 +16,5 @@ metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
......@@ -10,7 +10,7 @@ import collections
import importlib.util
import fnmatch
from typing import Iterator, List, Literal, Union
from typing import Iterator, List, Literal, Union, Any, Callable
import gc
import torch
......@@ -60,7 +60,12 @@ def handle_arg_string(arg):
return True
elif arg.lower() == "false":
return False
return arg
elif arg.isnumeric():
return int(arg)
try:
return float(arg)
except ValueError:
return arg
def simple_parse_args_string(args_string):
......@@ -85,6 +90,32 @@ def join_iters(iters):
def chunks(iter, n: int = 0, fn=None):
"""
Divides an iterable into chunks of specified size or based on a given function.
Useful for batching
Parameters:
- iter: The input iterable to be divided into chunks.
- n: An integer representing the size of each chunk. Default is 0.
- fn: A function that takes the current index and the iterable as arguments and returns the size of the chunk. Default is None.
Returns:
An iterator that yields chunks of the input iterable.
Example usage:
```
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
for chunk in chunks(data, 3):
print(chunk)
```
Output:
```
[1, 2, 3]
[4, 5, 6]
[7, 8, 9]
[10]
```
"""
arr = []
for i, x in enumerate(iter):
arr.append(x)
......@@ -201,7 +232,13 @@ def make_disjoint_window(pair):
class Reorderer:
def __init__(self, arr, fn) -> None:
def __init__(self, arr: List[Any], fn: Callable) -> None:
"""Reorder an array according to some function
Args:
arr (List[Any]): The initial array
fn (Callable[[Any], Any]): A function to determine the priority of elements
"""
self.size = len(arr)
arr = list(enumerate(arr))
arr = group(arr, lambda x: fn(x[1]))
......@@ -213,9 +250,22 @@ class Reorderer:
self.arr = arr
def get_reordered(self):
"""Gets the reordered array
Returns:
List[Any]: The reordered array
"""
return [x[1] for x in self.arr]
def get_original(self, newarr):
"""Restores the original order of a new array based on the old array's order
Args:
newarr (List[Any]): The array to be restored
Returns:
List[Any]: The array restored to the original order
"""
res = [None] * self.size
cov = [False] * self.size
......@@ -296,31 +346,27 @@ def make_table(result_dict, column: str = "results"):
elif column == "groups":
column_name = "Groups"
md_writer = MarkdownTableWriter()
latex_writer = LatexTableWriter()
md_writer.headers = [
column_name,
"Version",
"Filter",
"Metric",
"Value",
"",
"Stderr",
]
latex_writer.headers = [
all_headers = [
column_name,
"Version",
"Filter",
"n-shot",
"Metric",
"Value",
"",
"Stderr",
]
md_writer = MarkdownTableWriter()
latex_writer = LatexTableWriter()
md_writer.headers = all_headers
latex_writer.headers = all_headers
values = []
for k, dic in result_dict[column].items():
version = result_dict["versions"][k]
n = str(result_dict["n-shot"][k])
if "alias" in dic:
k = dic.pop("alias")
......@@ -332,9 +378,9 @@ def make_table(result_dict, column: str = "results"):
if m + "_stderr" + "," + f in dic:
se = dic[m + "_stderr" + "," + f]
values.append([k, version, f, m, "%.4f" % v, "±", "%.4f" % se])
values.append([k, version, f, n, m, "%.4f" % v, "±", "%.4f" % se])
else:
values.append([k, version, f, m, "%.4f" % v, "", ""])
values.append([k, version, f, n, m, "%.4f" % v, "", ""])
k = ""
version = ""
md_writer.value_matrix = values
......@@ -442,7 +488,6 @@ yaml.add_constructor("!function", import_function)
def load_yaml_config(yaml_path=None, yaml_config=None, yaml_dir=None):
if yaml_config is None:
with open(yaml_path, "rb") as file:
yaml_config = yaml.full_load(file)
......@@ -463,7 +508,6 @@ def load_yaml_config(yaml_path=None, yaml_config=None, yaml_dir=None):
include_path.reverse()
final_yaml_config = {}
for path in include_path:
# Assumes that path is a full path.
# If not found, assume the included yaml
# is in the same dir as the original yaml
......
......@@ -70,7 +70,8 @@ promptsource = [
]
gptq = ["auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"]
anthropic = ["anthropic"]
openai = ["openai", "tiktoken"]
openai = ["openai>=1.3.5", "tiktoken"]
vllm = ["vllm"]
all = [
"lm_eval[dev]",
"lm_eval[testing]",
......@@ -80,5 +81,6 @@ all = [
"lm_eval[promptsource]",
"lm_eval[gptq]",
"lm_eval[anthropic]",
"lm_eval[openai]"
"lm_eval[openai]",
"lm_eval[vllm]",
]
import pytest
from typing import List
from lm_eval.api.instance import Instance
import lm_eval.tasks as tasks
import sys
import torch
@pytest.mark.skip(reason="requires CUDA")
class TEST_VLLM:
vllm = pytest.importorskip("vllm")
try:
from lm_eval.models.vllm_causallms import VLLM
LM = VLLM(pretrained="EleutherAI/pythia-70m")
except ModuleNotFoundError:
pass
torch.use_deterministic_algorithms(True)
tasks.initialize_tasks()
multiple_choice_task = tasks.TASK_REGISTRY.get("arc_easy")() # type: ignore
multiple_choice_task.build_all_requests(limit=10, rank=0, world_size=1)
MULTIPLE_CH: List[Instance] = multiple_choice_task.instances
generate_until_task = tasks.TASK_REGISTRY.get("gsm8k")() # type: ignore
generate_until_task.build_all_requests(limit=10, rank=0, world_size=1)
generate_until_task._config.generation_kwargs["max_gen_toks"] = 10
generate_until: List[Instance] = generate_until_task.instances
rolling_task = tasks.TASK_REGISTRY.get("wikitext")() # type: ignore
rolling_task.build_all_requests(limit=10, rank=0, world_size=1)
ROLLING: List[Instance] = rolling_task.instances
# TODO: make proper tests
def test_logliklihood(self) -> None:
res = self.LM.loglikelihood(self.MULTIPLE_CH)
assert len(res) == len(self.MULTIPLE_CH)
for x in res:
assert isinstance(x[0], float)
def test_generate_until(self) -> None:
res = self.LM.generate_until(self.generate_until)
assert len(res) == len(self.generate_until)
for x in res:
assert isinstance(x, str)
def test_logliklihood_rolling(self) -> None:
res = self.LM.loglikelihood_rolling(self.ROLLING)
for x in res:
assert isinstance(x, float)
import hashlib
import json
import openai
import os
import pickle
import pytest
......@@ -8,6 +7,10 @@ import unittest.mock as mock
import lm_eval.models as models
from openai import OpenAI
client = OpenAI()
LOGLIKELIHOOD_TEST_CASES = [
("The quick brown fox jumps over the lazy", " dog"),
......@@ -172,7 +175,7 @@ def openai_mock_completion(**kwargs):
if os.path.exists(fname):
with open(fname, "rb") as fh:
return pickle.load(fh)
ret = openai.Completion.create(**kwargs)
ret = client.completions.create(**kwargs)
ret.api_key = ""
with open(fname, "wb") as fh:
pickle.dump(ret, fh)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment