Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
57f4a8a9
Commit
57f4a8a9
authored
Mar 23, 2020
by
Neel Kant
Browse files
Remove unused code
parent
70174ae3
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
3 additions
and
7 deletions
+3
-7
megatron/data_utils/datasets.py
megatron/data_utils/datasets.py
+3
-7
No files found.
megatron/data_utils/datasets.py
View file @
57f4a8a9
...
...
@@ -327,8 +327,8 @@ class json_dataset(data.Dataset):
all_strs (list): list of all strings from the dataset
all_labels (list): list of all labels from the dataset (if they have it)
"""
def
__init__
(
self
,
path
,
tokenizer
=
None
,
preprocess_fn
=
None
,
binarize_sent
=
False
,
text_key
=
'sentence'
,
label_key
=
'label'
,
loose_json
=
False
,
**
kwargs
):
def
__init__
(
self
,
path
,
tokenizer
=
None
,
preprocess_fn
=
None
,
text_key
=
'sentence'
,
label_key
=
'label'
,
loose_json
=
False
,
**
kwargs
):
self
.
is_lazy
=
False
self
.
preprocess_fn
=
preprocess_fn
self
.
path
=
path
...
...
@@ -344,9 +344,6 @@ class json_dataset(data.Dataset):
self
.
X
.
append
(
s
)
self
.
Y
.
append
(
j
[
label_key
])
if
binarize_sent
:
self
.
Y
=
binarize_labels
(
self
.
Y
,
hard
=
binarize_sent
)
def
SetTokenizer
(
self
,
tokenizer
):
if
tokenizer
is
None
:
self
.
using_tokenizer
=
False
...
...
@@ -453,6 +450,7 @@ class json_dataset(data.Dataset):
j
[
self
.
label_key
]
=
-
1
yield
j
class
GPT2Dataset
(
data
.
Dataset
):
def
__init__
(
self
,
ds
,
...
...
@@ -629,10 +627,8 @@ class bert_sentencepair_dataset(data.Dataset):
np_rng
=
np
.
random
.
RandomState
(
seed
=
[
rng
.
randint
(
0
,
2
**
32
-
1
)
for
_
in
range
(
16
)])
# get seq length
target_seq_length
=
self
.
max_seq_len
short_seq
=
False
if
rng
.
random
()
<
self
.
short_seq_prob
:
target_seq_length
=
rng
.
randint
(
2
,
target_seq_length
)
short_seq
=
True
# get sentence pair and label
is_random_next
=
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment