Commit edab45d4 authored by liangjing's avatar liangjing
Browse files

Update preprocess_data.py

parent 4b255099
Pipeline #1895 passed with stage
......@@ -203,7 +203,7 @@ def get_args():
choices=['BertWordPieceLowerCase','BertWordPieceCase',
'GPT2BPETokenizer', 'SentencePieceTokenizer',
'GPTSentencePieceTokenizer', 'Llama2Tokenizer',
'Llama3Tokenizer', 'MistralTokenizer', 'NullTokenizer'],
'Llama3Tokenizer', 'MistralTokenizer', 'QwenTokenizer', 'NullTokenizer'],
help='What type of tokenizer to use.')
group.add_argument('--tokenizer-model', type=str, default=None,
help='YTTM tokenizer model.')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment