Commit 76e9a71e authored by Mohammad Shoeybi's avatar Mohammad Shoeybi
Browse files

Merge branch 'indexed_dataset_nocast' into 'staging_gpt2_dataloader'

Do not cast return type of indexed dataset

See merge request ADLR/megatron-lm!48
parents d8bb57ad 71cf1866
......@@ -470,8 +470,6 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
ptr, size = self._index[idx]
np_array = np.frombuffer(self._bin_buffer, dtype=self._index.dtype,
count=size, offset=ptr)
if self._index.dtype != np.int64:
np_array = np_array.astype(np.int64)
return np_array
elif isinstance(idx, slice):
start, stop, step = idx.indices(len(self))
......@@ -498,8 +496,6 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
ptr += offset * np.dtype(self._index.dtype).itemsize
np_array = np.frombuffer(self._bin_buffer, dtype=self._index.dtype,
count=length, offset=ptr)
if self._index.dtype != np.int64:
np_array = np_array.astype(np.int64)
return np_array
@property
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment