Commit 0d1ef037 authored by lintangsutawika's avatar lintangsutawika
Browse files

solved merge conflict

parents aa44be3f ada4a31d
include: _hellaswag_yaml
task: hellaswag_sr
dataset_path: alexandrainst/m_hellaswag
dataset_name: sr
training_split: null
validation_split: val
include: _hellaswag_yaml
task: hellaswag_sv
dataset_path: alexandrainst/m_hellaswag
dataset_name: sv
training_split: null
validation_split: val
include: _hellaswag_yaml
task: hellaswag_ta
dataset_path: alexandrainst/m_hellaswag
dataset_name: ta
training_split: null
validation_split: val
include: _hellaswag_yaml
task: hellaswag_te
dataset_path: alexandrainst/m_hellaswag
dataset_name: te
training_split: null
validation_split: val
include: _hellaswag_yaml
task: hellaswag_uk
dataset_path: alexandrainst/m_hellaswag
dataset_name: uk
training_split: null
validation_split: val
include: _hellaswag_yaml
task: hellaswag_vi
dataset_path: alexandrainst/m_hellaswag
dataset_name: vi
training_split: null
validation_split: val
import datasets
import re
def preprocess(text):
text = text.strip()
# NOTE: Brackets are artifacts of the WikiHow dataset portion of HellaSwag.
text = text.replace(" [title]", ". ")
text = re.sub("\\[.*?\\]", "", text)
text = text.replace(" ", " ")
return text
def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
def _process_doc(doc):
ctx = doc["ctx_a"] + " " + doc["ctx_b"].capitalize()
out_doc = {
"query": preprocess(doc["activity_label"] + ": " + ctx),
"choices": [preprocess(ending) for ending in doc["endings"]],
"gold": int(doc["label"]),
}
return out_doc
return dataset.map(_process_doc)
......@@ -18,4 +18,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
import argparse
from typing import Dict, List
import yaml
......
......@@ -17,4 +17,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 0.0
version: 0.0
......@@ -20,4 +20,4 @@ metric_list:
aggregation: bits_per_byte
higher_is_better: false
metadata:
- version: 2.0
version: 2.0
......@@ -18,4 +18,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -42,4 +42,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 0.0
version: 1.0
......@@ -16,4 +16,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
def doc_to_text(doc) -> str:
ctxs = "\n".join(doc["CONTEXTS"])
return "Abstract: {}\nQuestion: {}\nAnswer:".format(
ctxs, doc["QUESTION"], doc["final_decision"]
ctxs,
doc["QUESTION"],
)
......@@ -13,4 +13,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -19,4 +19,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -11,4 +11,4 @@ doc_to_choice: ["no", "yes"]
metric_list:
- metric: f1
metadata:
- version: 1.0
version: 1.0
......@@ -15,4 +15,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 2.0
......@@ -3,7 +3,6 @@ from functools import partial
def process_docs(dataset, set_answer_type="bool"):
FEATURES = ["title", "abstract", "question", "answer", "answer_type"]
def _categorise_answer(answer_blob):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment