Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
c6882114
Commit
c6882114
authored
Apr 15, 2020
by
Mohammad
Browse files
added runtime compilation for helpers
parent
8134313d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
15 additions
and
0 deletions
+15
-0
megatron/data/bert_dataset.py
megatron/data/bert_dataset.py
+3
-0
megatron/data/dataset_utils.py
megatron/data/dataset_utils.py
+9
-0
megatron/data/gpt2_dataset.py
megatron/data/gpt2_dataset.py
+3
-0
No files found.
megatron/data/bert_dataset.py
View file @
c6882114
...
...
@@ -246,6 +246,9 @@ def get_samples_mapping_(indexed_dataset,
start_time
=
time
.
time
()
print_rank_0
(
' > building sapmles index mapping for {} ...'
.
format
(
name
))
# First compile and then import.
from
megatron.data.dataset_utils
import
compile_helper
compile_helper
()
from
megatron.data
import
helpers
samples_mapping
=
helpers
.
build_mapping
(
indexed_dataset
.
doc_idx
,
...
...
megatron/data/dataset_utils.py
View file @
c6882114
...
...
@@ -18,6 +18,15 @@ import collections
import
numpy
as
np
def
compile_helper
():
"""Compile helper function ar runtime. Make sure this
is invoked on a single process."""
import
os
import
subprocess
path
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
))
subprocess
.
run
([
'make'
,
'-C'
,
path
])
def
build_training_sample
(
sample
,
target_seq_length
,
max_seq_length
,
vocab_id_list
,
vocab_id_to_token_dict
,
...
...
megatron/data/gpt2_dataset.py
View file @
c6882114
...
...
@@ -178,6 +178,9 @@ def _build_index_mappings(name, data_prefix, documents, sizes,
# sample-idx.
start_time
=
time
.
time
()
# Use C++ implementation for speed.
# First compile and then import.
from
megatron.data.dataset_utils
import
compile_helper
compile_helper
()
from
megatron.data
import
helpers
assert
doc_idx
.
dtype
==
np
.
int32
assert
sizes
.
dtype
==
np
.
int32
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment