Commit 835cc40e authored by lintangsutawika's avatar lintangsutawika
Browse files

merged latest and added altworld files

parents 8da401e0 c9bbec6e
...@@ -15,3 +15,5 @@ metric_list: ...@@ -15,3 +15,5 @@ metric_list:
- metric: !function metrics.bleu - metric: !function metrics.bleu
aggregation: !function metrics.agg_bleu aggregation: !function metrics.agg_bleu
higher_is_better: true higher_is_better: true
metadata:
- version: 0.0
...@@ -13,3 +13,5 @@ metric_list: ...@@ -13,3 +13,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 1.0
...@@ -10,3 +10,5 @@ doc_to_target: label ...@@ -10,3 +10,5 @@ doc_to_target: label
doc_to_choice: !function utils.doc_to_choice doc_to_choice: !function utils.doc_to_choice
metric_list: metric_list:
- metric: acc - metric: acc
metadata:
- version: 1.0
...@@ -15,3 +15,5 @@ metric_list: ...@@ -15,3 +15,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 1.0
...@@ -14,3 +14,5 @@ metric_list: ...@@ -14,3 +14,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 1.0
...@@ -16,3 +16,5 @@ metric_list: ...@@ -16,3 +16,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 1.0
...@@ -10,7 +10,7 @@ import collections ...@@ -10,7 +10,7 @@ import collections
import importlib.util import importlib.util
import fnmatch import fnmatch
from typing import Iterator, List, Literal, Union from typing import Iterator, List, Literal, Union, Any, Callable
import gc import gc
import torch import torch
...@@ -60,7 +60,12 @@ def handle_arg_string(arg): ...@@ -60,7 +60,12 @@ def handle_arg_string(arg):
return True return True
elif arg.lower() == "false": elif arg.lower() == "false":
return False return False
return arg elif arg.isnumeric():
return int(arg)
try:
return float(arg)
except ValueError:
return arg
def simple_parse_args_string(args_string): def simple_parse_args_string(args_string):
...@@ -85,6 +90,32 @@ def join_iters(iters): ...@@ -85,6 +90,32 @@ def join_iters(iters):
def chunks(iter, n: int = 0, fn=None): def chunks(iter, n: int = 0, fn=None):
"""
Divides an iterable into chunks of specified size or based on a given function.
Useful for batching
Parameters:
- iter: The input iterable to be divided into chunks.
- n: An integer representing the size of each chunk. Default is 0.
- fn: A function that takes the current index and the iterable as arguments and returns the size of the chunk. Default is None.
Returns:
An iterator that yields chunks of the input iterable.
Example usage:
```
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
for chunk in chunks(data, 3):
print(chunk)
```
Output:
```
[1, 2, 3]
[4, 5, 6]
[7, 8, 9]
[10]
```
"""
arr = [] arr = []
for i, x in enumerate(iter): for i, x in enumerate(iter):
arr.append(x) arr.append(x)
...@@ -201,7 +232,13 @@ def make_disjoint_window(pair): ...@@ -201,7 +232,13 @@ def make_disjoint_window(pair):
class Reorderer: class Reorderer:
def __init__(self, arr, fn) -> None: def __init__(self, arr: List[Any], fn: Callable) -> None:
"""Reorder an array according to some function
Args:
arr (List[Any]): The initial array
fn (Callable[[Any], Any]): A function to determine the priority of elements
"""
self.size = len(arr) self.size = len(arr)
arr = list(enumerate(arr)) arr = list(enumerate(arr))
arr = group(arr, lambda x: fn(x[1])) arr = group(arr, lambda x: fn(x[1]))
...@@ -213,9 +250,22 @@ class Reorderer: ...@@ -213,9 +250,22 @@ class Reorderer:
self.arr = arr self.arr = arr
def get_reordered(self): def get_reordered(self):
"""Gets the reordered array
Returns:
List[Any]: The reordered array
"""
return [x[1] for x in self.arr] return [x[1] for x in self.arr]
def get_original(self, newarr): def get_original(self, newarr):
"""Restores the original order of a new array based on the old array's order
Args:
newarr (List[Any]): The array to be restored
Returns:
List[Any]: The array restored to the original order
"""
res = [None] * self.size res = [None] * self.size
cov = [False] * self.size cov = [False] * self.size
...@@ -296,31 +346,27 @@ def make_table(result_dict, column: str = "results"): ...@@ -296,31 +346,27 @@ def make_table(result_dict, column: str = "results"):
elif column == "groups": elif column == "groups":
column_name = "Groups" column_name = "Groups"
md_writer = MarkdownTableWriter() all_headers = [
latex_writer = LatexTableWriter()
md_writer.headers = [
column_name,
"Version",
"Filter",
"Metric",
"Value",
"",
"Stderr",
]
latex_writer.headers = [
column_name, column_name,
"Version", "Version",
"Filter", "Filter",
"n-shot",
"Metric", "Metric",
"Value", "Value",
"", "",
"Stderr", "Stderr",
] ]
md_writer = MarkdownTableWriter()
latex_writer = LatexTableWriter()
md_writer.headers = all_headers
latex_writer.headers = all_headers
values = [] values = []
for k, dic in result_dict[column].items(): for k, dic in result_dict[column].items():
version = result_dict["versions"][k] version = result_dict["versions"][k]
n = str(result_dict["n-shot"][k])
if "alias" in dic: if "alias" in dic:
k = dic.pop("alias") k = dic.pop("alias")
...@@ -332,9 +378,9 @@ def make_table(result_dict, column: str = "results"): ...@@ -332,9 +378,9 @@ def make_table(result_dict, column: str = "results"):
if m + "_stderr" + "," + f in dic: if m + "_stderr" + "," + f in dic:
se = dic[m + "_stderr" + "," + f] se = dic[m + "_stderr" + "," + f]
values.append([k, version, f, m, "%.4f" % v, "±", "%.4f" % se]) values.append([k, version, f, n, m, "%.4f" % v, "±", "%.4f" % se])
else: else:
values.append([k, version, f, m, "%.4f" % v, "", ""]) values.append([k, version, f, n, m, "%.4f" % v, "", ""])
k = "" k = ""
version = "" version = ""
md_writer.value_matrix = values md_writer.value_matrix = values
...@@ -442,7 +488,6 @@ yaml.add_constructor("!function", import_function) ...@@ -442,7 +488,6 @@ yaml.add_constructor("!function", import_function)
def load_yaml_config(yaml_path=None, yaml_config=None, yaml_dir=None): def load_yaml_config(yaml_path=None, yaml_config=None, yaml_dir=None):
if yaml_config is None: if yaml_config is None:
with open(yaml_path, "rb") as file: with open(yaml_path, "rb") as file:
yaml_config = yaml.full_load(file) yaml_config = yaml.full_load(file)
...@@ -463,7 +508,6 @@ def load_yaml_config(yaml_path=None, yaml_config=None, yaml_dir=None): ...@@ -463,7 +508,6 @@ def load_yaml_config(yaml_path=None, yaml_config=None, yaml_dir=None):
include_path.reverse() include_path.reverse()
final_yaml_config = {} final_yaml_config = {}
for path in include_path: for path in include_path:
# Assumes that path is a full path. # Assumes that path is a full path.
# If not found, assume the included yaml # If not found, assume the included yaml
# is in the same dir as the original yaml # is in the same dir as the original yaml
......
...@@ -70,7 +70,8 @@ promptsource = [ ...@@ -70,7 +70,8 @@ promptsource = [
] ]
gptq = ["auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"] gptq = ["auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"]
anthropic = ["anthropic"] anthropic = ["anthropic"]
openai = ["openai", "tiktoken"] openai = ["openai>=1.3.5", "tiktoken"]
vllm = ["vllm"]
all = [ all = [
"lm_eval[dev]", "lm_eval[dev]",
"lm_eval[testing]", "lm_eval[testing]",
...@@ -80,5 +81,6 @@ all = [ ...@@ -80,5 +81,6 @@ all = [
"lm_eval[promptsource]", "lm_eval[promptsource]",
"lm_eval[gptq]", "lm_eval[gptq]",
"lm_eval[anthropic]", "lm_eval[anthropic]",
"lm_eval[openai]" "lm_eval[openai]",
"lm_eval[vllm]",
] ]
import pytest
from typing import List
from lm_eval.api.instance import Instance
import lm_eval.tasks as tasks
import sys
import torch
@pytest.mark.skip(reason="requires CUDA")
class TEST_VLLM:
vllm = pytest.importorskip("vllm")
try:
from lm_eval.models.vllm_causallms import VLLM
LM = VLLM(pretrained="EleutherAI/pythia-70m")
except ModuleNotFoundError:
pass
torch.use_deterministic_algorithms(True)
tasks.initialize_tasks()
multiple_choice_task = tasks.TASK_REGISTRY.get("arc_easy")() # type: ignore
multiple_choice_task.build_all_requests(limit=10, rank=0, world_size=1)
MULTIPLE_CH: List[Instance] = multiple_choice_task.instances
generate_until_task = tasks.TASK_REGISTRY.get("gsm8k")() # type: ignore
generate_until_task.build_all_requests(limit=10, rank=0, world_size=1)
generate_until_task._config.generation_kwargs["max_gen_toks"] = 10
generate_until: List[Instance] = generate_until_task.instances
rolling_task = tasks.TASK_REGISTRY.get("wikitext")() # type: ignore
rolling_task.build_all_requests(limit=10, rank=0, world_size=1)
ROLLING: List[Instance] = rolling_task.instances
# TODO: make proper tests
def test_logliklihood(self) -> None:
res = self.LM.loglikelihood(self.MULTIPLE_CH)
assert len(res) == len(self.MULTIPLE_CH)
for x in res:
assert isinstance(x[0], float)
def test_generate_until(self) -> None:
res = self.LM.generate_until(self.generate_until)
assert len(res) == len(self.generate_until)
for x in res:
assert isinstance(x, str)
def test_logliklihood_rolling(self) -> None:
res = self.LM.loglikelihood_rolling(self.ROLLING)
for x in res:
assert isinstance(x, float)
import hashlib import hashlib
import json import json
import openai
import os import os
import pickle import pickle
import pytest import pytest
...@@ -8,6 +7,10 @@ import unittest.mock as mock ...@@ -8,6 +7,10 @@ import unittest.mock as mock
import lm_eval.models as models import lm_eval.models as models
from openai import OpenAI
client = OpenAI()
LOGLIKELIHOOD_TEST_CASES = [ LOGLIKELIHOOD_TEST_CASES = [
("The quick brown fox jumps over the lazy", " dog"), ("The quick brown fox jumps over the lazy", " dog"),
...@@ -172,7 +175,7 @@ def openai_mock_completion(**kwargs): ...@@ -172,7 +175,7 @@ def openai_mock_completion(**kwargs):
if os.path.exists(fname): if os.path.exists(fname):
with open(fname, "rb") as fh: with open(fname, "rb") as fh:
return pickle.load(fh) return pickle.load(fh)
ret = openai.Completion.create(**kwargs) ret = client.completions.create(**kwargs)
ret.api_key = "" ret.api_key = ""
with open(fname, "wb") as fh: with open(fname, "wb") as fh:
pickle.dump(ret, fh) pickle.dump(ret, fh)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment