"include/vscode:/vscode.git/clone" did not exist on "334361cbde76a2566fb215a64a6652205b0d2336"
Commit aca3f5e4 authored by sanchit-gandhi's avatar sanchit-gandhi
Browse files

[training] compute normalised wer

parent c2b90bdc
...@@ -29,8 +29,10 @@ def wer(asr_model_name_or_path, prompts, audios, device, per_device_eval_batch_s ...@@ -29,8 +29,10 @@ def wer(asr_model_name_or_path, prompts, audios, device, per_device_eval_batch_s
batch_size=int(per_device_eval_batch_size), batch_size=int(per_device_eval_batch_size),
) )
word_error = 100 * metric.compute( normalizer = asr_pipeline.tokenizer.normalize
predictions=[t["text"].lower() for t in transcriptions], references=[t.lower() for t in prompts] normalized_predictions = [normalizer(t["text"]) for t in transcriptions]
) normalized_references = [normalizer(t) for t in prompts]
word_error = 100 * metric.compute(predictions=normalized_predictions, references=normalized_references)
return word_error, [t["text"] for t in transcriptions] return word_error, [t["text"] for t in transcriptions]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment