"vscode:/vscode.git/clone" did not exist on "bd78f63a54e439a46f162f191618e3ba554aeef6"
Commit f1f9fa0a authored by Jared Casper's avatar Jared Casper
Browse files

Merge branch 'staging_compile_helpers' into 'staging'

runtime compilation of helpers

See merge request ADLR/megatron-lm!61
parents 8134313d 9073c4bd
......@@ -246,6 +246,9 @@ def get_samples_mapping_(indexed_dataset,
start_time = time.time()
print_rank_0(' > building sapmles index mapping for {} ...'.format(
name))
# First compile and then import.
from megatron.data.dataset_utils import compile_helper
compile_helper()
from megatron.data import helpers
samples_mapping = helpers.build_mapping(
indexed_dataset.doc_idx,
......
......@@ -18,6 +18,19 @@ import collections
import numpy as np
def compile_helper():
"""Compile helper function ar runtime. Make sure this
is invoked on a single process."""
import os
import subprocess
path = os.path.abspath(os.path.dirname(__file__))
ret = subprocess.run(['make', '-C', path])
if ret.returncode != 0:
print("Making C++ dataset helpers module failed, exiting.")
import sys
sys.exit(1)
def build_training_sample(sample,
target_seq_length, max_seq_length,
vocab_id_list, vocab_id_to_token_dict,
......
......@@ -178,6 +178,9 @@ def _build_index_mappings(name, data_prefix, documents, sizes,
# sample-idx.
start_time = time.time()
# Use C++ implementation for speed.
# First compile and then import.
from megatron.data.dataset_utils import compile_helper
compile_helper()
from megatron.data import helpers
assert doc_idx.dtype == np.int32
assert sizes.dtype == np.int32
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment