LabelSentenceReader.py 1.23 KB
Newer Older
Rayyyyy's avatar
Rayyyyy committed
1
2
import os

Rayyyyy's avatar
Rayyyyy committed
3
4
from . import InputExample

Rayyyyy's avatar
Rayyyyy committed
5
6
7
8

class LabelSentenceReader:
    """Reads in a file that has at least two columns: a label and a sentence.
    This reader can for example be used with the BatchHardTripletLoss.
Rayyyyy's avatar
Rayyyyy committed
9
10
    Maps labels automatically to integers
    """
Rayyyyy's avatar
Rayyyyy committed
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39

    def __init__(self, folder, label_col_idx=0, sentence_col_idx=1, separator="\t"):
        self.folder = folder
        self.label_map = {}
        self.label_col_idx = label_col_idx
        self.sentence_col_idx = sentence_col_idx
        self.separator = separator

    def get_examples(self, filename, max_examples=0):
        examples = []

        id = 0
        for line in open(os.path.join(self.folder, filename), encoding="utf-8"):
            splits = line.strip().split(self.separator)
            label = splits[self.label_col_idx]
            sentence = splits[self.sentence_col_idx]

            if label not in self.label_map:
                self.label_map[label] = len(self.label_map)

            label_id = self.label_map[label]
            guid = "%s-%d" % (filename, id)
            id += 1
            examples.append(InputExample(guid=guid, texts=[sentence], label=label_id))

            if 0 < max_examples <= id:
                break

        return examples