add qasper in scrolls

020abc86 · lintangsutawika · c87703f3 · 020abc86 · 020abc86 · 020abc86
Commit 020abc86 authored Sep 13, 2023 by lintangsutawika
3 changed files
--- a/lm_eval/tasks/scrolls/metrics.py
+++ b/lm_eval/tasks/scrolls/metrics.py
+import evaluate
+
+rouge_fn = evaluate.load('rouge')
+
+def rouge1(predictions, references):
+    results = rouge_fn.compute(predictions=predictions, references=references)
+    return results['rouge1']
+
+def rouge2(predictions, references):
+    results = rouge_fn.compute(predictions=predictions, references=references)
+    return results['rouge2']
+
+def rougeL(predictions, references):
+    results = rouge_fn.compute(predictions=predictions, references=references)
+    return results['rougeL']
+
+squad_metric = evaluate.load("squad_v2")
+
+def agg_f1(samples):
+    predictions, references = zip(*samples)  # unzip, if you will
+    computed = squad_metric.compute(predictions=predictions, references=references)
+    return computed["f1"]
+
+
+def _download_metric():
+    import os
+    import shutil
+    from huggingface_hub import hf_hub_download
+    scrolls_metric_path = hf_hub_download(repo_id="tau/scrolls", repo_type="dataset", filename="metrics/scrolls.py")
+    updated_scrolls_metric_path = (
+        os.path.dirname(scrolls_metric_path) + os.path.basename(scrolls_metric_path).replace(".", "_") + ".py"
+    )
+    shutil.copy(scrolls_metric_path, updated_scrolls_metric_path)
+    return updated_scrolls_metric_path
\ No newline at end of file
--- a/lm_eval/tasks/scrolls/qasper/boolean_task.yaml
+++ b/lm_eval/tasks/scrolls/qasper/boolean_task.yaml
+group: scrolls
+task: scrolls_qasper_boolean
+dataset_path: tau/scrolls
+dataset_name: qasper
+output_type: multiple_choice
+training_split: train
+validation_split: validation
+process_docs: !function ../preprocessors.process_docs_prepended_question
+doc_to_text: "{{text}}\n\nHypothesis: {{question}}\nConclusion:"
+doc_to_target: "{{outputs[0]}}"
+doc_to_choice: ["yes", "no"]
+should_decontaminate: true
+doc_to_decontamination_query: input
+metric_list:
+  - metric: f1
--- a/lm_eval/tasks/scrolls/qasper/freeform_task.yaml
+++ b/lm_eval/tasks/scrolls/qasper/freeform_task.yaml
+group: scrolls
+task: scrolls_qasper_freeform
+dataset_path: tau/scrolls
+dataset_name: qasper
+output_type: greedy_until
+training_split: train
+validation_split: validation
+process_docs: !function ../preprocessors.process_docs_prepended_question
+doc_to_text: "{{text}}\n\nHypothesis: {{question}}\nConclusion:"
+doc_to_target: "{{outputs[0]}}"
+should_decontaminate: true
+doc_to_decontamination_query: input
+metric_list:
+  - metric: f1