#!/bin/bash
# Copyright (c) 2017  Johns Hopkins University (Author: Yenda Trmal <jtrmal@gmail.com>)
# Apache 2.0


## Filter for scoring of the STT results. Convert everything to lowercase
## and add some ad-hoc fixes for the hesitations

perl -e '
   while(<STDIN>) {
     @A  = split(" ", $_);
     $id = shift @A; print "$id ";
     foreach $a (@A) {
       print lc($a) . " " unless $a =~ /\[.*\]/;
     }
     print "\n";
    }' | \
sed -e '
    s/\<mhm\>/hmm/g;
    s/\<mm\>/hmm/g;
    s/\<mmm\>/hmm/g;
'

#| uconv -f  utf-8  -t utf-8 -x Latin-ASCII
