Merge branch 'master' into json-task

4de8a74e · Stella Biderman · GitHub · 3226ed64 · bda68845 · 4de8a74e
Unverified Commit 4de8a74e authored May 21, 2023 by Stella Biderman Committed by GitHub May 21, 2023
20 changed files
--- a/lm_eval/datasets/bigbench_resources/logical_deduction_three_objects.json
+++ b/lm_eval/datasets/bigbench_resources/logical_deduction_three_objects.json
--- a/lm_eval/datasets/bigbench_resources/movie_recommendation.json
+++ b/lm_eval/datasets/bigbench_resources/movie_recommendation.json
--- a/lm_eval/datasets/bigbench_resources/navigate.json
+++ b/lm_eval/datasets/bigbench_resources/navigate.json
--- a/lm_eval/datasets/bigbench_resources/reasoning_about_colored_objects.json
+++ b/lm_eval/datasets/bigbench_resources/reasoning_about_colored_objects.json
--- a/lm_eval/datasets/bigbench_resources/ruin_names.json
+++ b/lm_eval/datasets/bigbench_resources/ruin_names.json
--- a/lm_eval/datasets/bigbench_resources/salient_translation_error_detection.json
+++ b/lm_eval/datasets/bigbench_resources/salient_translation_error_detection.json
--- a/lm_eval/datasets/bigbench_resources/snarks.json
+++ b/lm_eval/datasets/bigbench_resources/snarks.json
--- a/lm_eval/datasets/bigbench_resources/sports_understanding.json
+++ b/lm_eval/datasets/bigbench_resources/sports_understanding.json
--- a/lm_eval/datasets/bigbench_resources/temporal_sequences.json
+++ b/lm_eval/datasets/bigbench_resources/temporal_sequences.json
--- a/lm_eval/datasets/bigbench_resources/tracking_shuffled_objects_five_objects.json
+++ b/lm_eval/datasets/bigbench_resources/tracking_shuffled_objects_five_objects.json
--- a/lm_eval/datasets/bigbench_resources/tracking_shuffled_objects_seven_objects.json
+++ b/lm_eval/datasets/bigbench_resources/tracking_shuffled_objects_seven_objects.json
--- a/lm_eval/datasets/bigbench_resources/tracking_shuffled_objects_three_objects.json
+++ b/lm_eval/datasets/bigbench_resources/tracking_shuffled_objects_three_objects.json
--- a/lm_eval/datasets/wikitext/__init__.py
+++ b/lm_eval/datasets/wikitext/__init__.py
--- a/lm_eval/datasets/wikitext/dataset_infos.json
+++ b/lm_eval/datasets/wikitext/dataset_infos.json
-{"wikitext-103-v1": {"description": " The WikiText language modeling dataset is a collection of over 100 million tokens extracted from the set of verified\n Good and Featured articles on Wikipedia. The dataset is available under the Creative Commons Attribution-ShareAlike\n License.\n", "citation": "@misc{merity2016pointer,\n      title={Pointer Sentinel Mixture Models},\n      author={Stephen Merity and Caiming Xiong and James Bradbury and Richard Socher},\n      year={2016},\n      eprint={1609.07843},\n      archivePrefix={arXiv},\n      primaryClass={cs.CL}\n}\n", "homepage": "https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset/", "license": "Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0)", "features": {"page": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "wikitext", "config_name": "wikitext-103-v1", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 1281262, "num_examples": 62, "dataset_name": "wikitext"}, "train": {"name": "train", "num_bytes": 539297488, "num_examples": 29444, "dataset_name": "wikitext"}, "validation": {"name": "validation", "num_bytes": 1142488, "num_examples": 60, "dataset_name": "wikitext"}}, "download_checksums": {"https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip": {"num_bytes": 190229076, "checksum": "242ba0f20b329cfdf1ccc61e9e9e5b59becf189db7f7a81cd2a0e2fc31539590"}}, "download_size": 190229076, "post_processing_size": null, "dataset_size": 541721238, "size_in_bytes": 731950314}, "wikitext-2-v1": {"description": " The WikiText language modeling dataset is a collection of over 100 million tokens extracted from the set of verified\n Good and Featured articles on Wikipedia. The dataset is available under the Creative Commons Attribution-ShareAlike\n License.\n", "citation": "@misc{merity2016pointer,\n      title={Pointer Sentinel Mixture Models},\n      author={Stephen Merity and Caiming Xiong and James Bradbury and Richard Socher},\n      year={2016},\n      eprint={1609.07843},\n      archivePrefix={arXiv},\n      primaryClass={cs.CL}\n}\n", "homepage": "https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset/", "license": "Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0)", "features": {"page": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "wikitext", "config_name": "wikitext-2-v1", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 1256634, "num_examples": 62, "dataset_name": "wikitext"}, "train": {"name": "train", "num_bytes": 10799034, "num_examples": 629, "dataset_name": "wikitext"}, "validation": {"name": "validation", "num_bytes": 1121860, "num_examples": 60, "dataset_name": "wikitext"}}, "download_checksums": {"https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip": {"num_bytes": 4475746, "checksum": "92675f1d63015c1c8b51f1656a52d5bdbc33aafa60cc47a218a66e7ee817488c"}}, "download_size": 4475746, "post_processing_size": null, "dataset_size": 13177528, "size_in_bytes": 17653274}, "wikitext-103-raw-v1": {"description": " The WikiText language modeling dataset is a collection of over 100 million tokens extracted from the set of verified\n Good and Featured articles on Wikipedia. The dataset is available under the Creative Commons Attribution-ShareAlike\n License.\n", "citation": "@misc{merity2016pointer,\n      title={Pointer Sentinel Mixture Models},\n      author={Stephen Merity and Caiming Xiong and James Bradbury and Richard Socher},\n      year={2016},\n      eprint={1609.07843},\n      archivePrefix={arXiv},\n      primaryClass={cs.CL}\n}\n", "homepage": "https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset/", "license": "Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0)", "features": {"page": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "wikitext", "config_name": "wikitext-103-raw-v1", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 1290775, "num_examples": 62, "dataset_name": "wikitext"}, "train": {"name": "train", "num_bytes": 540656522, "num_examples": 29444, "dataset_name": "wikitext"}, "validation": {"name": "validation", "num_bytes": 1147025, "num_examples": 60, "dataset_name": "wikitext"}}, "download_checksums": {"https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-raw-v1.zip": {"num_bytes": 191984949, "checksum": "91c00ae287f0d699e18605c84afc9e45c192bc6b7797ff8837e5474655a33794"}}, "download_size": 191984949, "post_processing_size": null, "dataset_size": 543094322, "size_in_bytes": 735079271}, "wikitext-2-raw-v1": {"description": " The WikiText language modeling dataset is a collection of over 100 million tokens extracted from the set of verified\n Good and Featured articles on Wikipedia. The dataset is available under the Creative Commons Attribution-ShareAlike\n License.\n", "citation": "@misc{merity2016pointer,\n      title={Pointer Sentinel Mixture Models},\n      author={Stephen Merity and Caiming Xiong and James Bradbury and Richard Socher},\n      year={2016},\n      eprint={1609.07843},\n      archivePrefix={arXiv},\n      primaryClass={cs.CL}\n}\n", "homepage": "https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset/", "license": "Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0)", "features": {"page": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "wikitext", "config_name": "wikitext-2-raw-v1", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 1290775, "num_examples": 62, "dataset_name": "wikitext"}, "train": {"name": "train", "num_bytes": 10942633, "num_examples": 629, "dataset_name": "wikitext"}, "validation": {"name": "validation", "num_bytes": 1147025, "num_examples": 60, "dataset_name": "wikitext"}}, "download_checksums": {"https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip": {"num_bytes": 4721645, "checksum": "ef7edb566e3e2b2d31b29c1fdb0c89a4cc683597484c3dc2517919c615435a11"}}, "download_size": 4721645, "post_processing_size": null, "dataset_size": 13380433, "size_in_bytes": 18102078}}
--- a/lm_eval/datasets/wikitext/wikitext.py
+++ b/lm_eval/datasets/wikitext/wikitext.py
--- a/lm_eval/evaluator.py
+++ b/lm_eval/evaluator.py
--- a/lm_eval/models/__init__.py
+++ b/lm_eval/models/__init__.py
 from . import gpt2
 from . import gpt3
+from . import huggingface
 from . import textsynth
 from . import dummy

 MODEL_REGISTRY = {
    "hf": gpt2.HFLM,
+    "hf-causal": gpt2.HFLM,
+    "hf-causal-experimental": huggingface.AutoCausalLM,
+    "hf-seq2seq": huggingface.AutoSeq2SeqLM,
    "gpt2": gpt2.GPT2LM,
    "gpt3": gpt3.GPT3LM,
    "textsynth": textsynth.TextSynthLM,

--- a/lm_eval/models/gpt2.py
+++ b/lm_eval/models/gpt2.py
--- a/lm_eval/models/huggingface.py
+++ b/lm_eval/models/huggingface.py
--- a/lm_eval/models/textsynth.py
+++ b/lm_eval/models/textsynth.py
@@ -123,7 +123,8 @@ class TextSynthLM(BaseLM):
        res = []
        for request in tqdm(requests):
            inp = request[0]
-            until = request[1]
+            request_args = request[1]
+            until = request_args["until"]
            response = textsynth_completion(
                url=self.api_url + "/v1/engines/" + self.engine + "/completions",
                headers={"Authorization": "Bearer " + self.api_key},