"git@developer.sourcefind.cn:OpenDAS/pytorch3d.git" did not exist on "b2b0c5a4426bb907517452a6fe643eda39dd73c8"
Commit f1f9fa0a authored by Jared Casper's avatar Jared Casper
Browse files

Merge branch 'staging_compile_helpers' into 'staging'

runtime compilation of helpers

See merge request ADLR/megatron-lm!61
parents 8134313d 9073c4bd
...@@ -246,6 +246,9 @@ def get_samples_mapping_(indexed_dataset, ...@@ -246,6 +246,9 @@ def get_samples_mapping_(indexed_dataset,
start_time = time.time() start_time = time.time()
print_rank_0(' > building sapmles index mapping for {} ...'.format( print_rank_0(' > building sapmles index mapping for {} ...'.format(
name)) name))
# First compile and then import.
from megatron.data.dataset_utils import compile_helper
compile_helper()
from megatron.data import helpers from megatron.data import helpers
samples_mapping = helpers.build_mapping( samples_mapping = helpers.build_mapping(
indexed_dataset.doc_idx, indexed_dataset.doc_idx,
......
...@@ -18,6 +18,19 @@ import collections ...@@ -18,6 +18,19 @@ import collections
import numpy as np import numpy as np
def compile_helper():
"""Compile helper function ar runtime. Make sure this
is invoked on a single process."""
import os
import subprocess
path = os.path.abspath(os.path.dirname(__file__))
ret = subprocess.run(['make', '-C', path])
if ret.returncode != 0:
print("Making C++ dataset helpers module failed, exiting.")
import sys
sys.exit(1)
def build_training_sample(sample, def build_training_sample(sample,
target_seq_length, max_seq_length, target_seq_length, max_seq_length,
vocab_id_list, vocab_id_to_token_dict, vocab_id_list, vocab_id_to_token_dict,
......
...@@ -178,6 +178,9 @@ def _build_index_mappings(name, data_prefix, documents, sizes, ...@@ -178,6 +178,9 @@ def _build_index_mappings(name, data_prefix, documents, sizes,
# sample-idx. # sample-idx.
start_time = time.time() start_time = time.time()
# Use C++ implementation for speed. # Use C++ implementation for speed.
# First compile and then import.
from megatron.data.dataset_utils import compile_helper
compile_helper()
from megatron.data import helpers from megatron.data import helpers
assert doc_idx.dtype == np.int32 assert doc_idx.dtype == np.int32
assert sizes.dtype == np.int32 assert sizes.dtype == np.int32
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment