Merge branch 'big-refactor' into add-qa4mre-config

9907e0a7 · FarzanehNakhaee · 649a7f95 · 070b6b9c · 9907e0a7 · 9907e0a7
Commit 9907e0a7 authored Jul 04, 2023 by FarzanehNakhaee
9 changed files
--- a/lm_eval/tasks/super_glue/record/t5-prompt.yaml
+++ b/lm_eval/tasks/super_glue/record/t5-prompt.yaml
 group:
  - super-glue-t5-prompt
-task: t5-prompt
+task: super_glue-record-t5-prompt
-reference: "From Raffel et. al. 2019"
 dataset_path: super_glue
 dataset_name: record
 training_split: train

--- a/lm_eval/tasks/super_glue/wic/default.yaml
+++ b/lm_eval/tasks/super_glue/wic/default.yaml
+group:
+  - super-glue-lm-eval-v1
+task: "wic"
+dataset_path: super_glue
+dataset_name: wic
+output_type: multiple_choice
+training_split: train
+validation_split: validation
+doc_to_text: !function utils.doc_to_text
+doc_to_target: !function utils.doc_to_target
+gold_alias: "{{label}}" # this will be cast to an int.
+template_aliases: "{% set answer_choices = ['no', 'yes'] %}"
+metric_list:
+  - metric: acc
--- a/lm_eval/tasks/super_glue/wic/promptsource-00.yaml
+++ b/lm_eval/tasks/super_glue/wic/promptsource-00.yaml
-group:
-  - super-glue-promptsource
-task: "GPT-3-prompt"
-dataset_path: super_glue
-dataset_name: wic
-training_split: train
-validation_split: validation
-use_prompt: "promptsource:GPT-3-prompt"
-metric_list:
-  - metric: exact_match
-    aggregation: mean
-    higher_is_better: true
-    ignore_case: true
-    ignore_punctuation: true
--- a/lm_eval/tasks/super_glue/wic/promptsource-01.yaml
+++ b/lm_eval/tasks/super_glue/wic/promptsource-01.yaml
-include: promptsource-00.yaml
-group:
-  - super-glue-promptsource
-task: "GPT-3-prompt-with-label"
-use_prompt: "promptsource:GPT-3-prompt-with-label"
--- a/lm_eval/tasks/super_glue/wic/promptsource-02.yaml
+++ b/lm_eval/tasks/super_glue/wic/promptsource-02.yaml
-include: promptsource-00.yaml
-group:
-  - super-glue-promptsource
-task: "affirmation_true_or_false"
-use_prompt: "promptsource:affirmation_true_or_false"
--- a/lm_eval/tasks/super_glue/wic/utils.py
+++ b/lm_eval/tasks/super_glue/wic/utils.py
+def doc_to_text(doc):
+    return (
+        "Sentence 1: {}\nSentence 2: {}\nQuestion: Is the word '{}' used in the same way in the"
+        " two sentences above?\nAnswer:".format(
+            doc["sentence1"],
+            doc["sentence2"],
+            doc["sentence1"][doc["start1"] : doc["end1"]],
+        )
+    )
+def doc_to_target(doc):
+    return " {}".format({0: "no", 1: "yes"}[doc["label"]])
--- a/lm_eval/tasks/super_glue/wsc/t5-prompt.yaml
+++ b/lm_eval/tasks/super_glue/wsc/t5-prompt.yaml
 group:
  - super-glue-t5-prompt
-task: t5-prompt
+task: super_glue-wsc-t5-prompt
-reference: "From Raffel et. al. 2019"
 dataset_path: super_glue
 dataset_name: wsc
 training_split: train

--- a/lm_eval/utils.py
+++ b/lm_eval/utils.py
@@ -10,7 +10,7 @@ import collections
 import importlib.util
 import fnmatch
-from typing import List, Union
+from typing import List, Literal, Union
 import gc
 import torch
@@ -453,7 +453,11 @@ def create_iterator(raw_iterator, rank, world_size, limit=None):
    return islice(raw_iterator, rank, limit, world_size)
-def pad_and_concat(max_length: int, tensors: List[torch.Tensor], padding_side="right"):
+def pad_and_concat(
+    max_length: int,
+    tensors: List[torch.Tensor],
+    padding_side: Literal["right", "left"] = "right",
+):
    """
    Method for padding a list of tensors given the maximum tensor
    length in the batch. Used for batching inputs and continuations in

--- a/setup.py
+++ b/setup.py
@@ -55,7 +55,7 @@ setuptools.setup(
        "promptsource": [
            "promptsource @ git+https://github.com/bigscience-workshop/promptsource.git#egg=promptsource"
        ],
-        "auto-gptq": ["auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"],
+        "gptq": ["auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"],
        "anthropic": ["anthropic"],
    },
 )