common.py 1.55 KB
Newer Older
1
import datasets
Jason Phang's avatar
checkin  
Jason Phang committed
2
import numpy as np
Jason Phang's avatar
Jason Phang committed
3
import random
Jason Phang's avatar
checkin  
Jason Phang committed
4
5
6
from ..base import Dataset


sdtblck's avatar
sdtblck committed
7
class HFTask(Dataset):
sdtblck's avatar
sdtblck committed
8
9
    DATASET_PATH = None
    DATASET_NAME = None
Jason Phang's avatar
checkin  
Jason Phang committed
10

Jason Phang's avatar
Jason Phang committed
11
12
13
    def __init__(self):
        super().__init__()
        self._training_docs = None
sdtblck's avatar
sdtblck committed
14
        self.data = datasets.load_dataset(path=self.DATASET_PATH, name=self.DATASET_NAME)
Jason Phang's avatar
Jason Phang committed
15

16
17
18
19
20
21
22
23
24
25
26
    def has_training_docs(self):
        """Whether the task has a training set"""
        return True if "train" in self.data.keys() else False

    def has_validation_docs(self):
        """Whether the task has a validation set"""
        return True if "validation" in self.data.keys() else False

    def has_test_docs(self):
        """Whether the task has a test set"""
        return True if "test" in self.data.keys() else False
Jason Phang's avatar
checkin  
Jason Phang committed
27
28

    def training_docs(self):
Jason Phang's avatar
Jason Phang committed
29
30
        # Cache training for faster few-shot.
        # If data is too large to fit in memory, override this method.
Jason Phang's avatar
checkin  
Jason Phang committed
31
        if self.has_training_docs():
Jason Phang's avatar
Jason Phang committed
32
            if self._training_docs is None:
33
                self._training_docs = list(self.data["train"])
Jason Phang's avatar
Jason Phang committed
34
            return self._training_docs
Jason Phang's avatar
checkin  
Jason Phang committed
35
36
37

    def validation_docs(self):
        if self.has_validation_docs():
38
            return self.data["validation"]
Jason Phang's avatar
checkin  
Jason Phang committed
39
40
41

    def test_docs(self):
        if self.has_test_docs():
42
            return self.data["test"]
Jason Phang's avatar
checkin  
Jason Phang committed
43
44
45
46
47
48
49
50
51


def simple_accuracy_metric(preds, golds):
    acc = float((np.array(preds) == np.array(golds)).mean())
    return {
        "major": acc,
        "minor": {"acc": acc},
        "higher_is_better": True,
    }
Jason Phang's avatar
Jason Phang committed
52
53
54
55
56
57
58


def yesno(x):
    if x:
        return 'yes'
    else:
        return 'no'