Commit 46a536cc authored by Mohammad Shoeybi's avatar Mohammad Shoeybi
Browse files

Merge branch 'mmap_data_samples' into 'master'

mmap sample map

See merge request ADLR/megatron-lm!88
parents cfe35fd3 dd76c914
......@@ -279,7 +279,7 @@ def get_samples_mapping_(indexed_dataset,
print_rank_0(' > loading indexed mapping from {}'.format(
indexmap_filename))
start_time = time.time()
samples_mapping = np.load(indexmap_filename, allow_pickle=True)
samples_mapping = np.load(indexmap_filename, allow_pickle=True, mmap_mode='r')
print_rank_0(' loaded indexed file in {:3.3f} seconds'.format(
time.time() - start_time))
print_rank_0(' total number of samples: {}'.format(
......
......@@ -212,13 +212,13 @@ def _build_index_mappings(name, data_prefix, documents, sizes,
start_time = time.time()
print_rank_0(' > loading doc-idx mapping from {}'.format(
doc_idx_filename))
doc_idx = np.load(doc_idx_filename, allow_pickle=True)
doc_idx = np.load(doc_idx_filename, allow_pickle=True, mmap_mode='r')
print_rank_0(' > loading sample-idx mapping from {}'.format(
sample_idx_filename))
sample_idx = np.load(sample_idx_filename, allow_pickle=True)
sample_idx = np.load(sample_idx_filename, allow_pickle=True, mmap_mode='r')
print_rank_0(' > loading shuffle-idx mapping from {}'.format(
shuffle_idx_filename))
shuffle_idx = np.load(shuffle_idx_filename, allow_pickle=True)
shuffle_idx = np.load(shuffle_idx_filename, allow_pickle=True, mmap_mode='r')
print_rank_0(' loaded indexed file in {:3.3f} seconds'.format(
time.time() - start_time))
print_rank_0(' total number of samples: {}'.format(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment