format fix

4eeb8715 · lintangsutawika · 3e1301bb · 4eeb8715 · 4eeb8715 · 4eeb8715
Commit 4eeb8715 authored Jun 04, 2024 by lintangsutawika
Showing with 32 additions and 35 deletions

lm_eval/evaluator.py lm_eval/evaluator.py +0 -1

lm_eval/evaluator_utils.py lm_eval/evaluator_utils.py +2 -2

tests/testyamls/test-01.yaml tests/testyamls/test-01.yaml +30 -32

No files found.
--- a/lm_eval/evaluator.py
+++ b/lm_eval/evaluator.py
@@ -584,7 +584,6 @@ def evaluate(
                    task_aggregation_list = {}
                for group_or_task, group_or_task_info in task_dict.items():
                    # Convert to string
                    if isinstance(group_or_task, ConfigurableGroup):
                        group_config = group_or_task.config

--- a/lm_eval/evaluator_utils.py
+++ b/lm_eval/evaluator_utils.py
@@ -232,13 +232,13 @@ def prepare_print_tasks(
    for task_or_group_name, task_or_group_obj in task_dict.items():
        tab_string = " " * task_depth + "- " if task_depth > 0 else ""
        if isinstance(task_or_group_name, ConfigurableGroup):
-            string_name = task_or_group_name.group_name
+            # string_name = task_or_group_name.group_name
            name = task_or_group_name.task_id
            from_configurable_group = True
        elif isinstance(task_or_group_name, str):
            name = task_or_group_name
            if isinstance(task_or_group_obj, ConfigurableTask):
-                string_name = task_or_group_obj.task_name
+                # string_name = task_or_group_obj.task_name
                name = task_or_group_obj.task_id
            from_configurable_group = False

--- a/tests/testyamls/test-01.yaml
+++ b/tests/testyamls/test-01.yaml
@@ -3,12 +3,12 @@ group_alias: test 1
 task:
  - piqa # string task
  - ai2_arc # string tag
-  # - task: super-glue-lm-eval-v1 # Should this be spread out?
+  - task: super-glue-lm-eval-v1 # Should this be spread out?
-  #   num_fewshot: 3
+    num_fewshot: 3
  - task: swag # dict registered task
    num_fewshot: 2
-  # - task: mmlu
+  - task: mmlu
-  #   num_fewshot: 5
+    num_fewshot: 5
  - group: nli-tasks # dict group
    task:
      - anli
@@ -17,31 +17,29 @@ task:
    num_fewshot: 4
    metric_list:
      - metric: brier_score
-    aggregate_metric: true
+  - task: sciq # dict registered task duplicate
+    task_alias: sciq 2-shot
-  # - task: sciq # dict registered task duplicate
+    num_fewshot: 2
-  #   task_alias: sciq 2-shot
+  - task: sciq # dict registered task duplicate
-  #   num_fewshot: 2
+    task_alias: sciq 4-shot
-  # - task: sciq # dict registered task duplicate
+    num_fewshot: 4
-  #   task_alias: sciq 4-shot
+  - task: sciq # dict registered task duplicate
-  #   num_fewshot: 4
+    task_alias: sciq 6-shot
-  # - task: sciq # dict registered task duplicate
+    num_fewshot: 6
-  #   task_alias: sciq 6-shot
+  - task: siqa_custom # dict task
-  #   num_fewshot: 6
+    dataset_path: social_i_qa
-  # - task: siqa_custom # dict task
+    dataset_name: null
-  #   dataset_path: social_i_qa
+    output_type: multiple_choice
-  #   dataset_name: null
+    training_split: train
-  #   output_type: multiple_choice
+    validation_split: validation
-  #   training_split: train
+    doc_to_text: "Question: {{context}} {{question}}\nAnswer:"
-  #   validation_split: validation
+    target_delimiter: " "
-  #   doc_to_text: "Question: {{context}} {{question}}\nAnswer:"
+    doc_to_choice:
-  #   target_delimiter: " "
+      - "{{answerA}}"
-  #   doc_to_choice:
+      - "{{answerB}}"
-  #     - "{{answerA}}"
+      - "{{answerC}}"
-  #     - "{{answerB}}"
+    doc_to_target: "{{ (label|int) - 1 }}"
-  #     - "{{answerC}}"
+    metric_list:
-  #   doc_to_target: "{{ (label|int) - 1 }}"
+      - metric: acc
-  #   metric_list:
+        aggregation: mean
-  #     - metric: acc
+        higher_is_better: true
-  #       aggregation: mean
-  #       higher_is_better: true