revert parts back to main

42401fa2 · lintangsutawika · eb9f6788 · 42401fa2 · 42401fa2
Commit 42401fa2 authored Jul 08, 2024 by lintangsutawika
Show whitespace changes
Inline Side-by-side

Showing with 10 additions and 6 deletions

lm_eval/api/metrics.py lm_eval/api/metrics.py +1 -1

lm_eval/api/task.py lm_eval/api/task.py +9 -5

No files found.
--- a/lm_eval/api/metrics.py
+++ b/lm_eval/api/metrics.py
@@ -256,7 +256,7 @@ def mcc_fn(items):  # This is a passthrough function
 @register_metric(
    metric="f1",
    higher_is_better=True,
-    output_type=["multiple_choice"],
+    output_type="multiple_choice",
    aggregation="f1",
 )
 def f1_fn(items):  # This is a passthrough function

--- a/lm_eval/api/task.py
+++ b/lm_eval/api/task.py
@@ -1190,6 +1190,7 @@ class ConfigurableTask(Task):
                eval_logger.warning("Applied prompt returns empty string")
                return self.config.fewshot_delimiter
        else:
+            print(type(doc_to_text))
            raise TypeError

    def doc_to_target(self, doc: Mapping) -> Union[int, str, list]:
@@ -1279,6 +1280,7 @@ class ConfigurableTask(Task):
            else:
                # Otherwise they are placed in the continuation
                arguments = [(ctx, f"{target_delimiter}{cont}") for cont in choices]
+
            request_list = [
                Instance(
                    request_type="loglikelihood",
@@ -1432,6 +1434,7 @@ class ConfigurableTask(Task):
                ]
                acc_mutual_info = 1.0 if np.argmax(lls_mutual_info) == gold else 0.0
                result_dict["acc_mutual_info"] = acc_mutual_info
+
        elif self.OUTPUT_TYPE == "generate_until":
            gold = self.doc_to_target(doc)
            result = results[0]
@@ -1455,6 +1458,7 @@ class ConfigurableTask(Task):
                    scores = []
                    if not isinstance(gold, list):
                        # sometimes, a multiple_target dataset has exceptions where one doc has only one string answer
+                        # print(gold)
                        gold = [gold]
                    if metric == "exact_match":
                        result = [result for _ in range(len(gold))]