Modified from https://github.com/facebookresearch/fairseq/tree/272c4c5197250997148fb12c0db6306035f166a4/examples/wav2vec/unsupervised/scripts/phonemize_with_sil.py
print(f"| Error downloading {args.lexicon}, please download it from https://drive.google.com/file/d/1QVeyCpLXLnujBUAickpo-jaSVY-vKLnT/view?usp=sharing")
"BLEU score is being computed by splitting detokenized string on spaces, this is probably not what you want. Use --sacrebleu for standard 13a BLEU tokenization"
)
else:
logger.warning(
"If you are using BPE on the target side, the BLEU score is computed on BPE tokens, not on proper words. Use --sacrebleu for standard 13a BLEU tokenization"
)
# use print to be consistent with other main outputs: S-, H-, T-, D- and so on