Commit 57f4a8a9 authored by Neel Kant's avatar Neel Kant
Browse files

Remove unused code

parent 70174ae3
...@@ -327,8 +327,8 @@ class json_dataset(data.Dataset): ...@@ -327,8 +327,8 @@ class json_dataset(data.Dataset):
all_strs (list): list of all strings from the dataset all_strs (list): list of all strings from the dataset
all_labels (list): list of all labels from the dataset (if they have it) all_labels (list): list of all labels from the dataset (if they have it)
""" """
def __init__(self, path, tokenizer=None, preprocess_fn=None, binarize_sent=False, def __init__(self, path, tokenizer=None, preprocess_fn=None,
text_key='sentence', label_key='label', loose_json=False, **kwargs): text_key='sentence', label_key='label', loose_json=False, **kwargs):
self.is_lazy = False self.is_lazy = False
self.preprocess_fn = preprocess_fn self.preprocess_fn = preprocess_fn
self.path = path self.path = path
...@@ -344,9 +344,6 @@ class json_dataset(data.Dataset): ...@@ -344,9 +344,6 @@ class json_dataset(data.Dataset):
self.X.append(s) self.X.append(s)
self.Y.append(j[label_key]) self.Y.append(j[label_key])
if binarize_sent:
self.Y = binarize_labels(self.Y, hard=binarize_sent)
def SetTokenizer(self, tokenizer): def SetTokenizer(self, tokenizer):
if tokenizer is None: if tokenizer is None:
self.using_tokenizer = False self.using_tokenizer = False
...@@ -453,6 +450,7 @@ class json_dataset(data.Dataset): ...@@ -453,6 +450,7 @@ class json_dataset(data.Dataset):
j[self.label_key] = -1 j[self.label_key] = -1
yield j yield j
class GPT2Dataset(data.Dataset): class GPT2Dataset(data.Dataset):
def __init__(self, ds, def __init__(self, ds,
...@@ -629,10 +627,8 @@ class bert_sentencepair_dataset(data.Dataset): ...@@ -629,10 +627,8 @@ class bert_sentencepair_dataset(data.Dataset):
np_rng = np.random.RandomState(seed=[rng.randint(0, 2**32-1) for _ in range(16)]) np_rng = np.random.RandomState(seed=[rng.randint(0, 2**32-1) for _ in range(16)])
# get seq length # get seq length
target_seq_length = self.max_seq_len target_seq_length = self.max_seq_len
short_seq = False
if rng.random() < self.short_seq_prob: if rng.random() < self.short_seq_prob:
target_seq_length = rng.randint(2, target_seq_length) target_seq_length = rng.randint(2, target_seq_length)
short_seq = True
# get sentence pair and label # get sentence pair and label
is_random_next = None is_random_next = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment