Commit 61f29f7f authored by Myle Ott's avatar Myle Ott Committed by Facebook Github Bot
Browse files

Better error message for incorrect --dataset-impl

Summary: Pull Request resolved: https://github.com/pytorch/fairseq/pull/723

Differential Revision: D15260870

Pulled By: myleott

fbshipit-source-id: 73d9b138b9ab44f96824076258f1a6319193d0f7
parent bd6e5c4f
......@@ -83,15 +83,18 @@ class IndexedDataset(torch.utils.data.Dataset):
def __init__(self, path, fix_lua_indexing=False):
super().__init__()
self.path = path
self.fix_lua_indexing = fix_lua_indexing
self.read_index(path)
self.data_file = None
self.path = path
self.read_index(path)
def read_index(self, path):
with open(index_file_path(path), 'rb') as f:
magic = f.read(8)
assert magic == b'TNTIDX\x00\x00'
assert magic == b'TNTIDX\x00\x00', (
'Index file doesn\'t match expected format. '
'Make sure that --dataset-impl is configured properly.'
)
version = f.read(8)
assert struct.unpack('<Q', version) == (1,)
code, self.element_size = struct.unpack('<QQ', f.read(16))
......@@ -350,7 +353,10 @@ class MMapIndexedDataset(torch.utils.data.Dataset):
def __init__(self, path):
with open(path, 'rb') as stream:
magic_test = stream.read(9)
assert self._HDR_MAGIC == magic_test
assert self._HDR_MAGIC == magic_test, (
'Index file doesn\'t match expected format. '
'Make sure that --dataset-impl is configured properly.'
)
version = struct.unpack('<Q', stream.read(8))
assert (1,) == version
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment