up (#14079)

53dc39d8 · Patrick von Platen · GitHub · 0bc2e54f · 53dc39d8
Unverified Commit 53dc39d8 authored Oct 20, 2021 by Patrick von Platen Committed by GitHub Oct 20, 2021
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 1 deletion

examples/pytorch/speech-recognition/run_speech_recognition_ctc.py .../pytorch/speech-recognition/run_speech_recognition_ctc.py +6 -1

No files found.
--- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py
+++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py
@@ -410,10 +410,15 @@ def main():
    # load config
    config = AutoConfig.from_pretrained(model_args.model_name_or_path, cache_dir=model_args.cache_dir)

+    # tokenizer is defined by `tokenizer_class` if present in config else by `model_type`
+    config_for_tokenizer = config if config.tokenizer_class is not None else None
+    tokenizer_type = config.model_type if config.tokenizer_class is None else None
+
    # load feature_extractor, tokenizer and create processor
    tokenizer = AutoTokenizer.from_pretrained(
        training_args.output_dir,
-        tokenizer_type=config.model_type,
+        config=config_for_tokenizer,
+        tokenizer_type=tokenizer_type,
        unk_token="[UNK]",
        pad_token="[PAD]",
        word_delimiter_token="|",