Commit d928dd51 authored by Mohammad Shoeybi's avatar Mohammad Shoeybi
Browse files

Merge branch 'staging_dataset_error_reporting' into 'staging'

Better error reporting when a dataset doesn't exist.

See merge request ADLR/megatron-lm!63
parents c6b5c137 f2779b18
......@@ -43,7 +43,8 @@ def infer_dataset_impl(path):
else:
return None
else:
print(f"Dataset path does not exist: {path}")
print(f"Dataset does not exist: {path}")
print("Path should be a basename that both .idx and .bin can be appended to get full filenames.")
return None
......@@ -55,6 +56,10 @@ def make_builder(out_file, impl, vocab_size=None):
def make_dataset(path, impl, skip_warmup=False):
if not IndexedDataset.exists(path):
print(f"Dataset does not exist: {path}")
print("Path should be a basename that both .idx and .bin can be appended to get full filenames.")
return None
if impl == 'infer':
impl = infer_dataset_impl(path)
if impl == 'lazy' and IndexedDataset.exists(path):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment