Commit c6882114 authored by Mohammad's avatar Mohammad
Browse files

added runtime compilation for helpers

parent 8134313d
......@@ -246,6 +246,9 @@ def get_samples_mapping_(indexed_dataset,
start_time = time.time()
print_rank_0(' > building sapmles index mapping for {} ...'.format(
name))
# First compile and then import.
from megatron.data.dataset_utils import compile_helper
compile_helper()
from megatron.data import helpers
samples_mapping = helpers.build_mapping(
indexed_dataset.doc_idx,
......
......@@ -18,6 +18,15 @@ import collections
import numpy as np
def compile_helper():
"""Compile helper function ar runtime. Make sure this
is invoked on a single process."""
import os
import subprocess
path = os.path.abspath(os.path.dirname(__file__))
subprocess.run(['make', '-C', path])
def build_training_sample(sample,
target_seq_length, max_seq_length,
vocab_id_list, vocab_id_to_token_dict,
......
......@@ -178,6 +178,9 @@ def _build_index_mappings(name, data_prefix, documents, sizes,
# sample-idx.
start_time = time.time()
# Use C++ implementation for speed.
# First compile and then import.
from megatron.data.dataset_utils import compile_helper
compile_helper()
from megatron.data import helpers
assert doc_idx.dtype == np.int32
assert sizes.dtype == np.int32
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment