"tests/experimental/vscode:/vscode.git/clone" did not exist on "2e544bd77afe019c4bb9d8c6882879c48d3ac65f"
Commit 3207c19a authored by Jared Casper's avatar Jared Casper
Browse files

Missed some changes from next-best-lm branch.

parent 46ffb75c
...@@ -89,7 +89,7 @@ def set_global_variables(args): ...@@ -89,7 +89,7 @@ def set_global_variables(args):
set_args(args) set_args(args)
_build_num_microbatches_calculator(args) _build_num_microbatches_calculator(args)
if args.vocab_file: if args.vocab_file or args.tokenizer_model:
_ = _build_tokenizer(args) _ = _build_tokenizer(args)
_set_tensorboard_writer(args) _set_tensorboard_writer(args)
_set_adlr_autoresume(args) _set_adlr_autoresume(args)
......
...@@ -104,6 +104,8 @@ def get_args(): ...@@ -104,6 +104,8 @@ def get_args():
help='Append an <eod> token to the end of a document.') help='Append an <eod> token to the end of a document.')
group.add_argument('--lang', type=str, default='english', group.add_argument('--lang', type=str, default='english',
help='Language to use for NLTK-powered sentence splitting.') help='Language to use for NLTK-powered sentence splitting.')
group.add_argument('--tokenizer-model', type=str, default=None,
help='sentencepeice tokenizer model.')
group = parser.add_argument_group(title='output data') group = parser.add_argument_group(title='output data')
......
...@@ -326,6 +326,9 @@ def main(): ...@@ -326,6 +326,9 @@ def main():
for p in processes: for p in processes:
p.join() p.join()
if args.partitions == 1:
return
# encode partition files in parallel # encode partition files in parallel
processes = [] processes = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment