udpate with merge

51f27158 · lintangsutawika · 924c9790 · f5408b6b · 51f27158 · 51f27158
Commit 51f27158 authored Feb 01, 2024 by lintangsutawika
10 changed files
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "lm_eval"
-version = "0.4.0"
+version = "0.4.1"
 authors = [
    {name="EleutherAI", email="contact@eleuther.ai"}
 ]
@@ -56,15 +56,14 @@ Repository = "https://github.com/EleutherAI/lm-evaluation-harness"
 [project.optional-dependencies]
 anthropic = ["anthropic"]
 dev = ["pytest", "pytest-cov", "pytest-xdist", "pre-commit", "mypy"]
-gptq = ["auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"]
+gptq = ["auto-gptq[triton]>=0.6.0"]
 ifeval = ["langdetect", "immutabledict"]
 mamba = ["mamba_ssm", "causal-conv1d==1.0.2"]
 math = ["sympy>=1.12", "antlr4-python3-runtime==4.11"]
 multilingual = ["nagisa>=0.2.7", "jieba>=0.42.1", "pycountry"]
 openai = ["openai==1.3.9", "tiktoken"]
-promptsource = [
+optimum = ["optimum[openvino]"]
-    "promptsource @ git+https://github.com/bigscience-workshop/promptsource.git#egg=promptsource"
+promptsource = ["promptsource>=0.2.3"]
-]
 sentencepiece = ["sentencepiece>=0.1.98", "protobuf>=4.22.1"]
 testing = ["pytest", "pytest-cov", "pytest-xdist"]
 vllm = ["vllm<=0.2.5"]

--- a/scripts/build_benchmark.py
+++ b/scripts/build_benchmark.py
@@ -23,7 +23,7 @@ def parse_args():
 if __name__ == "__main__":
    args = parse_args()
-    with open(args.benchmark_path) as file:
+    with open(args.benchmark_path, encoding="utf-8") as file:
        TASK_LIST = yaml.full_load(file)
        for task in tqdm(TASK_LIST):
            eval_logger.info(f"Processing {task}")
@@ -57,5 +57,5 @@ if __name__ == "__main__":
                file_save_path = os.path.join(file_path, full_file_name)
                eval_logger.info(f"Save to {file_save_path}")
-                with open(file_save_path, "w") as yaml_file:
+                with open(file_save_path, "w", encoding="utf-8") as yaml_file:
                    yaml.dump(config_dict, yaml_file)
--- a/scripts/clean_training_data/generate_13_grams.py
+++ b/scripts/clean_training_data/generate_13_grams.py
@@ -119,7 +119,7 @@ class Buckets:
 def do_ngrams_in_buckets(n_value, working_directory, bucket_count):
-    pile_statistics = json.load(open("pile_statistics.json", "r"))
+    pile_statistics = json.load(open("pile_statistics.json", "r", encoding="utf-8"))
    pile_document_count = pile_statistics["Document Count"]
    start_offsets = pile_statistics["File Start Offsets"]
@@ -212,4 +212,4 @@ if __name__ == "__main__":
    info_dict = {"title": "dataset ngrams", "ngram_size": 13}
    info_dict_path = os.path.join(args.working_directory, "info.json")
-    json.dump(info_dict, open(info_dict_path, "w"))
+    json.dump(info_dict, open(info_dict_path, "w", encoding="utf-8"))
--- a/scripts/clean_training_data/investigate_pile.py
+++ b/scripts/clean_training_data/investigate_pile.py
@@ -79,7 +79,7 @@ if __name__ == "__main__":
    stats_file_path = "pile_statistics.json"
    if os.path.exists(stats_file_path):
-        stats = json.load(open(stats_file_path, "r"))
+        stats = json.load(open(stats_file_path, "r", encoding="utf-8"))
    else:
        document_count, total_document_size_chars, start_offsets = get_stats()
        stats = {
@@ -88,7 +88,7 @@ if __name__ == "__main__":
            "Total Pile Characters": total_document_size_chars,
            "File Start Offsets": start_offsets,
        }
-        json.dump(stats, open(stats_file_path, "w"), indent=4)
+        json.dump(stats, open(stats_file_path, "w", encoding="utf-8"), indent=4)
    print(f"document_count: {stats['Document Count']}")
    print(f"total_chars: {stats['Total Pile Characters']}")

--- a/scripts/make_table_results.py
+++ b/scripts/make_table_results.py
@@ -61,14 +61,14 @@ if __name__ == "__main__":
        if not filenames:
            continue
        path_readme = os.path.join(dirpath, "README.md")
-        with open(path_readme, "w") as f:
+        with open(path_readme, "w", encoding="utf-8") as f:
            # get path name, only last folder
            path_name = dirpath.split("/")[-1]
            f.write(f"# {path_name} \n\n")
        for filename in sorted([f for f in filenames if f.endswith(".json")]):
            path = os.path.join(dirpath, filename)
-            with open(path, "r") as f:
+            with open(path, "r", encoding="utf-8") as f:
                result_dict = json.load(f)
-            with open(path_readme, "a") as f:
+            with open(path_readme, "a", encoding="utf-8") as f:
                f.write(f"## {filename} \n")
                f.write(f"{make_table(result_dict)} \n")
--- a/scripts/make_table_tasks.py
+++ b/scripts/make_table_tasks.py
@@ -50,5 +50,5 @@ if __name__ == "__main__":
        values.append(v)
    writer.value_matrix = values
    table = writer.dumps()
-    with open(args.output, "w") as f:
+    with open(args.output, "w", encoding="utf-8") as f:
        f.write(table)
--- a/scripts/regression.py
+++ b/scripts/regression.py
@@ -94,7 +94,11 @@ def eval_models(args, branch=None):
        ret = os.system(command)
-        results[model] = json.load(open(output_path)) if ret == 0 else {"results": {}}
+        results[model] = (
+            json.load(open(output_path, encoding="utf-8"))
+            if ret == 0
+            else {"results": {}}
+        )
    end_time = time.time()

--- a/scripts/write_out.py
+++ b/scripts/write_out.py
@@ -53,7 +53,7 @@ def main():
    os.makedirs(args.output_base_path, exist_ok=True)
    for task_name, task in task_dict.items():
-        if type(task) == tuple:
+        if isinstance(task, tuple):
            group_name, task = task
        rnd = random.Random()
        rnd.seed(args.seed)

--- a/scripts/zeno_visualize.py
+++ b/scripts/zeno_visualize.py
@@ -69,18 +69,20 @@ def main():
            model_args = re.sub(
                "/|=",
                "__",
-                json.load(open(Path(args.data_path, model, "results.json")))["config"][
+                json.load(
-                    "model_args"
+                    open(Path(args.data_path, model, "results.json"), encoding="utf-8")
-                ],
+                )["config"]["model_args"],
            )
            with open(
-                Path(args.data_path, model, f"{model_args}_{task}.jsonl"), "r"
+                Path(args.data_path, model, f"{model_args}_{task}.jsonl"),
+                "r",
+                encoding="utf-8",
            ) as file:
                data = json.loads(file.read())
-            configs = json.load(open(Path(args.data_path, model, "results.json")))[
+            configs = json.load(
-                "configs"
+                open(Path(args.data_path, model, "results.json"), encoding="utf-8")
-            ]
+            )["configs"]
            config = configs[task]
            if model_index == 0:  # Only need to assemble data for the first model
@@ -124,7 +126,9 @@ def tasks_for_model(model: str, data_path: str):
        list: A list of tasks for the model.
    """
    dir_path = Path(data_path, model)
-    config = (json.load(open(Path(dir_path, "results.json")))["configs"],)
+    config = (
+        json.load(open(Path(dir_path, "results.json"), encoding="utf-8"))["configs"],
+    )
    return list(config[0].keys())

--- a/tests/models/test_openvino.py
+++ b/tests/models/test_openvino.py
+import random
+import tempfile
+import pytest
+from optimum.intel import OVModelForCausalLM
+from transformers import AutoTokenizer
+import lm_eval.evaluator as evaluator
+import lm_eval.tasks as tasks
+from lm_eval.api.registry import get_model
+tasks.initialize_tasks()
+SUPPORTED_ARCHITECTURES_TASKS = {
+    "facebook/opt-125m": "lambada_openai",
+    "hf-internal-testing/tiny-random-gpt2": "wikitext",
+}
+@pytest.mark.parametrize("model_id,task", SUPPORTED_ARCHITECTURES_TASKS.items())
+def test_evaluator(model_id, task):
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        model = OVModelForCausalLM.from_pretrained(
+            model_id, export=True, use_cache=True
+        )
+        model.save_pretrained(tmpdirname)
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        tokenizer.save_pretrained(tmpdirname)
+        lm = get_model("openvino").create_from_arg_string(
+            f"pretrained={tmpdirname}",
+            {
+                "batch_size": 1,
+                "device": "cpu",
+            },
+        )
+        def ll_fn(reqs):
+            for ctx, cont in [req.args for req in reqs]:
+                if len(ctx) == 0:
+                    continue
+                # space convention
+                assert ctx[-1] != " "
+                assert cont[0] == " " or ctx[-1] == "\n"
+            res = []
+            random.seed(42)
+            for _ in reqs:
+                res.append((-random.random(), False))
+            return res
+        def ll_perp_fn(reqs):
+            for (string,) in [req.args for req in reqs]:
+                assert isinstance(string, str)
+            res = []
+            random.seed(42)
+            for _ in reqs:
+                res.append(-random.random())
+            return res
+        lm.loglikelihood = ll_fn
+        lm.loglikelihood_rolling = ll_perp_fn
+        limit = 10
+        evaluator.simple_evaluate(
+            model=lm,
+            tasks=[task],
+            num_fewshot=0,
+            limit=limit,
+            bootstrap_iters=10,
+        )