Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ASR_onnxruntime
Commits
d08c1882
Commit
d08c1882
authored
Sep 08, 2023
by
chenxj
Browse files
main commit
parent
5dc02166
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
249 additions
and
0 deletions
+249
-0
main.py
main.py
+64
-0
src/silero/__pycache__/utils.cpython-37.pyc
src/silero/__pycache__/utils.cpython-37.pyc
+0
-0
src/silero/utils.py
src/silero/utils.py
+185
-0
No files found.
main.py
0 → 100644
View file @
d08c1882
import
onnx
import
torch
import
onnxruntime
from
src.silero.utils
import
prepare_model_input
,
prepare_model_input_warmup
,
Decoder
,
get_wav_file_list
import
time
import
argparse
def
str2bool
(
v
):
return
v
.
lower
()
in
(
"true"
,
"t"
,
"1"
)
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--warmup"
,
type
=
str2bool
,
default
=
False
)
parser
.
add_argument
(
"--wav_dir"
,
type
=
str
)
parser
.
add_argument
(
"--model_dir"
,
type
=
str
)
args
=
parser
.
parse_args
()
def
main
():
onnx_model
=
onnx
.
load
(
args
.
model_dir
)
onnx
.
checker
.
check_model
(
onnx_model
)
ort_session
=
onnxruntime
.
InferenceSession
(
args
.
model_dir
,
providers
=
[(
'ROCMExecutionProvider'
,
{
'device_id'
:
'4'
}),
'CPUExecutionProvider'
])
labels
=
[
'_'
,
'th'
,
'the'
,
'in'
,
'an'
,
're'
,
'er'
,
'on'
,
'at'
,
'ou'
,
'is'
,
'en'
,
'to'
,
'and'
,
'ed'
,
'al'
,
'as'
,
'it'
,
'ing'
,
'or'
,
'of'
,
'es'
,
'ar'
,
'he'
,
'le'
,
'st'
,
'se'
,
'om'
,
'ic'
,
'be'
,
'we'
,
'ly'
,
'that'
,
'no'
,
'wh'
,
've'
,
'ha'
,
'you'
,
'ch'
,
'ion'
,
'il'
,
'ent'
,
'ro'
,
'me'
,
'id'
,
'ac'
,
'gh'
,
'for'
,
'was'
,
'lo'
,
'ver'
,
'ut'
,
'li'
,
'ld'
,
'ay'
,
'ad'
,
'so'
,
'ir'
,
'im'
,
'un'
,
'wi'
,
'ter'
,
'are'
,
'with'
,
'ke'
,
'ge'
,
'do'
,
'ur'
,
'all'
,
'ce'
,
'ab'
,
'mo'
,
'go'
,
'pe'
,
'ne'
,
'this'
,
'ri'
,
'ght'
,
'de'
,
'one'
,
'us'
,
'am'
,
'out'
,
'fe'
,
'but'
,
'po'
,
'his'
,
'te'
,
'ho'
,
'ther'
,
'not'
,
'con'
,
'com'
,
'll'
,
'they'
,
'if'
,
'ould'
,
'su'
,
'have'
,
'ct'
,
'ain'
,
'our'
,
'ation'
,
'fr'
,
'ill'
,
'now'
,
'sa'
,
'had'
,
'tr'
,
'her'
,
'per'
,
'ant'
,
'oun'
,
'my'
,
'ul'
,
'ca'
,
'by'
,
'what'
,
'red'
,
'res'
,
'od'
,
'ome'
,
'ess'
,
'man'
,
'ex'
,
'she'
,
'pl'
,
'co'
,
'wor'
,
'pro'
,
'up'
,
'thing'
,
'there'
,
'ple'
,
'ag'
,
'can'
,
'qu'
,
'art'
,
'ally'
,
'ok'
,
'from'
,
'ust'
,
'very'
,
'sh'
,
'ind'
,
'est'
,
'some'
,
'ate'
,
'wn'
,
'ti'
,
'fo'
,
'ard'
,
'ap'
,
'him'
,
'were'
,
'ich'
,
'here'
,
'bo'
,
'ity'
,
'um'
,
'ive'
,
'ous'
,
'way'
,
'end'
,
'ig'
,
'pr'
,
'which'
,
'ma'
,
'ist'
,
'them'
,
'like'
,
'who'
,
'ers'
,
'when'
,
'act'
,
'use'
,
'about'
,
'ound'
,
'gr'
,
'et'
,
'ide'
,
'ight'
,
'ast'
,
'king'
,
'would'
,
'ci'
,
'their'
,
'other'
,
'see'
,
'ment'
,
'ong'
,
'wo'
,
'ven'
,
'know'
,
'how'
,
'said'
,
'ine'
,
'ure'
,
'more'
,
'der'
,
'sel'
,
'br'
,
'ren'
,
'ack'
,
'ol'
,
'ta'
,
'low'
,
'ough'
,
'then'
,
'peo'
,
'ye'
,
'ace'
,
'people'
,
'ink'
,
'ort'
,
'your'
,
'will'
,
'than'
,
'pp'
,
'any'
,
'ish'
,
'look'
,
'la'
,
'just'
,
'tor'
,
'ice'
,
'itt'
,
'af'
,
'these'
,
'sp'
,
'has'
,
'gre'
,
'been'
,
'ty'
,
'ies'
,
'ie'
,
'get'
,
'able'
,
'day'
,
'could'
,
'bl'
,
'two'
,
'time'
,
'beca'
,
'into'
,
'age'
,
'ans'
,
'mis'
,
'new'
,
'ree'
,
'ble'
,
'ite'
,
'si'
,
'urn'
,
'ass'
,
'cl'
,
'ber'
,
'str'
,
'think'
,
'dis'
,
'mar'
,
'ence'
,
'pt'
,
'self'
,
'ated'
,
'did'
,
'el'
,
'don'
,
'ck'
,
'ph'
,
'ars'
,
'ach'
,
'fore'
,
'its'
,
'part'
,
'ang'
,
'cre'
,
'well'
,
'ions'
,
'where'
,
'ves'
,
'ved'
,
'em'
,
'good'
,
'because'
,
'over'
,
'ud'
,
'ts'
,
'off'
,
'turn'
,
'cr'
,
'right'
,
'ress'
,
'most'
,
'every'
,
'pre'
,
'fa'
,
'fir'
,
'ild'
,
'pos'
,
'down'
,
'work'
,
'ade'
,
'say'
,
'med'
,
'also'
,
'litt'
,
'little'
,
'ance'
,
'come'
,
'ving'
,
'only'
,
'ful'
,
'ought'
,
'want'
,
'going'
,
'spe'
,
'ps'
,
'ater'
,
'first'
,
'after'
,
'ue'
,
'ose'
,
'mu'
,
'iz'
,
'ire'
,
'int'
,
'rest'
,
'ser'
,
'coun'
,
'des'
,
'light'
,
'son'
,
'let'
,
'ical'
,
'ick'
,
'ra'
,
'back'
,
'mon'
,
'ase'
,
'ign'
,
'ep'
,
'world'
,
'may'
,
'read'
,
'form'
,
'much'
,
'even'
,
'should'
,
'again'
,
'make'
,
'long'
,
'sto'
,
'cont'
,
'put'
,
'thr'
,
'under'
,
'cor'
,
'bet'
,
'jo'
,
'car'
,
'ile'
,
'went'
,
'yes'
,
'ually'
,
'row'
,
'hand'
,
'ak'
,
'call'
,
'ary'
,
'ia'
,
'many'
,
'cho'
,
'things'
,
'try'
,
'gl'
,
'ens'
,
'really'
,
'happ'
,
'great'
,
'dif'
,
'bu'
,
'hi'
,
'made'
,
'room'
,
'ange'
,
'cent'
,
'ious'
,
'je'
,
'three'
,
'ward'
,
'op'
,
'gen'
,
'those'
,
'life'
,
'tal'
,
'pa'
,
'through'
,
'und'
,
'cess'
,
'before'
,
'du'
,
'side'
,
'need'
,
'less'
,
'inter'
,
'ting'
,
'ry'
,
'ise'
,
'na'
,
'men'
,
'ave'
,
'fl'
,
'ction'
,
'pres'
,
'old'
,
'something'
,
'miss'
,
'never'
,
'got'
,
'feren'
,
'imp'
,
'sy'
,
'ations'
,
'tain'
,
'ning'
,
'ked'
,
'sm'
,
'take'
,
'ten'
,
'ted'
,
'ip'
,
'col'
,
'own'
,
'stand'
,
'add'
,
'min'
,
'wer'
,
'ms'
,
'ces'
,
'gu'
,
'land'
,
'bod'
,
'log'
,
'cour'
,
'ob'
,
'vo'
,
'ition'
,
'hu'
,
'came'
,
'comp'
,
'cur'
,
'being'
,
'comm'
,
'years'
,
'ily'
,
'wom'
,
'cer'
,
'kind'
,
'thought'
,
'such'
,
'tell'
,
'child'
,
'nor'
,
'bro'
,
'ial'
,
'pu'
,
'does'
,
'head'
,
'clo'
,
'ear'
,
'led'
,
'llow'
,
'ste'
,
'ness'
,
'too'
,
'start'
,
'mor'
,
'used'
,
'par'
,
'play'
,
'ents'
,
'tri'
,
'upon'
,
'tim'
,
'num'
,
'ds'
,
'ever'
,
'cle'
,
'ef'
,
'wr'
,
'vis'
,
'ian'
,
'sur'
,
'same'
,
'might'
,
'fin'
,
'differen'
,
'sho'
,
'why'
,
'body'
,
'mat'
,
'beg'
,
'vers'
,
'ouse'
,
'actually'
,
'ft'
,
'ath'
,
'hel'
,
'sha'
,
'ating'
,
'ual'
,
'found'
,
'ways'
,
'must'
,
'four'
,
'gi'
,
'val'
,
'di'
,
'tre'
,
'still'
,
'tory'
,
'ates'
,
'high'
,
'set'
,
'care'
,
'ced'
,
'last'
,
'find'
,
'au'
,
'inte'
,
'ev'
,
'ger'
,
'thank'
,
'ss'
,
'ict'
,
'ton'
,
'cal'
,
'nat'
,
'les'
,
'bed'
,
'away'
,
'place'
,
'house'
,
'che'
,
'ject'
,
'sol'
,
'another'
,
'ited'
,
'att'
,
'face'
,
'show'
,
'ner'
,
'ken'
,
'far'
,
'ys'
,
'lect'
,
'lie'
,
'tem'
,
'ened'
,
'night'
,
'while'
,
'looking'
,
'ah'
,
'wal'
,
'dr'
,
'real'
,
'cha'
,
'exp'
,
'war'
,
'five'
,
'pol'
,
'fri'
,
'wa'
,
'cy'
,
'fect'
,
'xt'
,
'left'
,
'give'
,
'soci'
,
'cond'
,
'char'
,
'bor'
,
'point'
,
'number'
,
'mister'
,
'called'
,
'six'
,
'bre'
,
'vi'
,
'without'
,
'person'
,
'air'
,
'different'
,
'lot'
,
'bit'
,
'pass'
,
'ular'
,
'youn'
,
'won'
,
'main'
,
'cri'
,
'ings'
,
'den'
,
'water'
,
'human'
,
'around'
,
'quest'
,
'ters'
,
'ities'
,
'feel'
,
'each'
,
'friend'
,
'sub'
,
'though'
,
'saw'
,
'ks'
,
'hund'
,
'hundred'
,
'times'
,
'lar'
,
'ff'
,
'amer'
,
'scho'
,
'sci'
,
'ors'
,
'lt'
,
'arch'
,
'fact'
,
'hal'
,
'himself'
,
'gener'
,
'mean'
,
'vol'
,
'school'
,
'ason'
,
'fam'
,
'ult'
,
'mind'
,
'itch'
,
'ped'
,
'home'
,
'young'
,
'took'
,
'big'
,
'love'
,
'reg'
,
'eng'
,
'sure'
,
'vent'
,
'ls'
,
'ot'
,
'ince'
,
'thous'
,
'eight'
,
'thousand'
,
'better'
,
'mom'
,
'appe'
,
'once'
,
'ied'
,
'mus'
,
'stem'
,
'sing'
,
'ident'
,
'als'
,
'uh'
,
'mem'
,
'produ'
,
'stud'
,
'power'
,
'atch'
,
'bas'
,
'father'
,
'av'
,
'nothing'
,
'gir'
,
'pect'
,
'unt'
,
'few'
,
'kes'
,
'eyes'
,
'sk'
,
'always'
,
'ared'
,
'toge'
,
'stru'
,
'together'
,
'ics'
,
'bus'
,
'fort'
,
'ween'
,
'rep'
,
'ically'
,
'small'
,
'ga'
,
'mer'
,
'ned'
,
'ins'
,
'between'
,
'yet'
,
'stre'
,
'hard'
,
'system'
,
'course'
,
'year'
,
'cept'
,
'publ'
,
'sim'
,
'sou'
,
'ready'
,
'follow'
,
'present'
,
'rel'
,
'turned'
,
'sw'
,
'possi'
,
'mother'
,
'io'
,
'bar'
,
'ished'
,
'dec'
,
'ments'
,
'pri'
,
'next'
,
'ross'
,
'both'
,
'ship'
,
'ures'
,
'americ'
,
'eas'
,
'asked'
,
'iness'
,
'serv'
,
'ists'
,
'ash'
,
'uni'
,
'build'
,
'phone'
,
'lau'
,
'ctor'
,
'belie'
,
'change'
,
'interest'
,
'peri'
,
'children'
,
'thir'
,
'lear'
,
'plan'
,
'import'
,
'ational'
,
'har'
,
'ines'
,
'dist'
,
'selves'
,
'city'
,
'sen'
,
'run'
,
'law'
,
'ghter'
,
'proble'
,
'woman'
,
'done'
,
'heart'
,
'book'
,
'aut'
,
'ris'
,
'lim'
,
'looked'
,
'vid'
,
'fu'
,
'bab'
,
'ately'
,
'ord'
,
'ket'
,
'oc'
,
'doing'
,
'area'
,
'tech'
,
'win'
,
'name'
,
'second'
,
'certain'
,
'pat'
,
'lad'
,
'quite'
,
'told'
,
'ific'
,
'ative'
,
'uring'
,
'gg'
,
'half'
,
'reason'
,
'moment'
,
'ility'
,
'ution'
,
'shall'
,
'aur'
,
'enough'
,
'idea'
,
'open'
,
'understand'
,
'vie'
,
'contin'
,
'mal'
,
'hor'
,
'qui'
,
'address'
,
'heard'
,
'help'
,
'inst'
,
'oney'
,
'whole'
,
'gover'
,
'commun'
,
'exam'
,
'near'
,
'didn'
,
'logy'
,
'oh'
,
'tru'
,
'lang'
,
'restaur'
,
'restaurant'
,
'design'
,
'ze'
,
'talk'
,
'leg'
,
'line'
,
'ank'
,
'ond'
,
'country'
,
'ute'
,
'howe'
,
'hold'
,
'live'
,
'comple'
,
'however'
,
'ized'
,
'ush'
,
'seen'
,
'bye'
,
'fer'
,
'ital'
,
'women'
,
'net'
,
'state'
,
'bur'
,
'fac'
,
'whe'
,
'important'
,
'dar'
,
'nine'
,
'sat'
,
'fic'
,
'known'
,
'having'
,
'against'
,
'soon'
,
'ety'
,
'langu'
,
'public'
,
'sil'
,
'best'
,
'az'
,
'knew'
,
'black'
,
'velo'
,
'sort'
,
'seven'
,
'imag'
,
'lead'
,
'cap'
,
'ask'
,
'alth'
,
'ature'
,
'nam'
,
'began'
,
'white'
,
'sent'
,
'sound'
,
'vir'
,
'days'
,
'anything'
,
'yeah'
,
'ub'
,
'blo'
,
'sun'
,
'story'
,
'dire'
,
'money'
,
'trans'
,
'mil'
,
'org'
,
'grow'
,
'cord'
,
'pped'
,
'cus'
,
'spo'
,
'sign'
,
'beaut'
,
'goodbye'
,
'inde'
,
'large'
,
'question'
,
'often'
,
'hour'
,
'que'
,
'pur'
,
'town'
,
'ield'
,
'coming'
,
'door'
,
'lig'
,
'ling'
,
'incl'
,
'partic'
,
'keep'
,
'engl'
,
'move'
,
'later'
,
'ants'
,
'food'
,
'lights'
,
'bal'
,
'words'
,
'list'
,
'aw'
,
'allow'
,
'aren'
,
'pret'
,
'tern'
,
'today'
,
'believe'
,
'almost'
,
'bir'
,
'word'
,
'possible'
,
'ither'
,
'case'
,
'ried'
,
'ural'
,
'round'
,
'twent'
,
'develo'
,
'plain'
,
'ended'
,
'iting'
,
'chang'
,
'sc'
,
'boy'
,
'gy'
,
'since'
,
'ones'
,
'suc'
,
'cas'
,
'national'
,
'plac'
,
'teen'
,
'pose'
,
'started'
,
'mas'
,
'fi'
,
'fif'
,
'afr'
,
'fully'
,
'grou'
,
'wards'
,
'girl'
,
'e'
,
't'
,
'a'
,
'o'
,
'i'
,
'n'
,
's'
,
'h'
,
'r'
,
'l'
,
'd'
,
'u'
,
'c'
,
'm'
,
'w'
,
'f'
,
'g'
,
'y'
,
'p'
,
'b'
,
'v'
,
'k'
,
"'"
,
'x'
,
'j'
,
'q'
,
'z'
,
'-'
,
'2'
,
' '
]
decoder
=
Decoder
(
labels
)
if
args
.
warmup
:
test_file
=
'speech_orig.wav'
inputs
=
prepare_model_input_warmup
(
test_file
)
# actual ONNX inference and decoding
T1
=
time
.
perf_counter
()
for
i
in
range
(
len
(
inputs
)):
onnx_input
=
inputs
[
i
].
detach
().
cpu
().
numpy
()
ort_inputs
=
{
'input'
:
onnx_input
}
ort_outs
=
ort_session
.
run
(
None
,
ort_inputs
)
T2
=
time
.
perf_counter
()
print
(
"warm up time:"
,
T2
-
T1
)
# test_files = ['speech_orig.wav']
test_files
=
get_wav_file_list
(
args
.
wav_dir
)
print
(
test_files
)
inputs
=
prepare_model_input
(
test_files
)
# actual ONNX inference and decoding
file_num
=
0
for
i
in
range
(
len
(
inputs
)):
onnx_input
=
inputs
[
i
].
detach
().
cpu
().
numpy
()
print
(
onnx_input
.
shape
)
ort_inputs
=
{
'input'
:
onnx_input
}
t1
=
time
.
perf_counter
()
ort_outs
=
ort_session
.
run
(
None
,
ort_inputs
)
t2
=
time
.
perf_counter
()
print
(
t2
-
t1
)
for
bn
in
range
(
onnx_input
.
shape
[
0
]):
file_num
=
file_num
+
1
if
file_num
>
len
(
test_files
):
break
decoded
=
decoder
(
torch
.
Tensor
(
ort_outs
[
0
])[
bn
])
print
(
decoded
)
if
__name__
==
"__main__"
:
main
()
src/silero/__pycache__/utils.cpython-37.pyc
0 → 100644
View file @
d08c1882
File added
src/silero/utils.py
0 → 100644
View file @
d08c1882
import
os
import
torch
import
warnings
import
torchaudio
from
typing
import
List
from
itertools
import
groupby
import
math
def
read_batch
(
audio_paths
:
List
[
str
]):
return
[
read_audio
(
audio_path
)
for
audio_path
in
audio_paths
]
def
split_into_batches
(
lst
:
List
[
str
],
batch_size
:
int
=
10
):
return
[
lst
[
i
:
i
+
batch_size
]
for
i
in
range
(
0
,
len
(
lst
),
batch_size
)]
def
read_audio
(
path
:
str
,
target_sr
:
int
=
16000
):
wav
,
sr
=
torchaudio
.
load
(
path
)
if
wav
.
size
(
0
)
>
1
:
wav
=
wav
.
mean
(
dim
=
0
,
keepdim
=
True
)
if
sr
!=
target_sr
:
transform
=
torchaudio
.
transforms
.
Resample
(
orig_freq
=
sr
,
new_freq
=
target_sr
)
wav
=
transform
(
wav
)
sr
=
target_sr
if
len
(
wav
.
squeeze
(
0
))
>
8
*
pow
(
2
,
18
):
transform
=
torchaudio
.
transforms
.
Resample
(
orig_freq
=
16000
,
new_freq
=
int
(
16000
*
8
*
pow
(
2
,
18
)
/
len
(
wav
.
squeeze
(
0
))))
wav
=
transform
(
wav
)
assert
len
(
wav
.
squeeze
(
0
))
<=
8
*
pow
(
2
,
18
)
return
wav
.
squeeze
(
0
)
def
_check_wav_file
(
path
):
img_end
=
{
'wav'
}
return
any
([
path
.
lower
().
endswith
(
e
)
for
e
in
img_end
])
def
get_wav_file_list
(
wav_file
):
wavs_lists
=
[]
if
wav_file
is
None
or
not
os
.
path
.
exists
(
wav_file
):
raise
Exception
(
"not found any wav file in {}"
.
format
(
wav_file
))
if
os
.
path
.
isfile
(
wav_file
)
and
_check_wav_file
(
wav_file
):
wavs_lists
.
append
(
wav_file
)
elif
os
.
path
.
isdir
(
wav_file
):
for
single_file
in
os
.
listdir
(
wav_file
):
file_path
=
os
.
path
.
join
(
wav_file
,
single_file
)
if
os
.
path
.
isfile
(
file_path
)
and
_check_wav_file
(
file_path
):
wavs_lists
.
append
(
file_path
)
if
len
(
wavs_lists
)
==
0
:
raise
Exception
(
"not found any wav file in {}"
.
format
(
wav_file
))
wavs_lists
=
sorted
(
wavs_lists
)
return
wavs_lists
def
prepare_model_input
(
lst
:
List
[
str
],
batch_size
:
int
=
24
,
device
=
torch
.
device
(
'cpu'
)):
batches
=
split_into_batches
(
lst
,
batch_size
)
inputs
=
[]
for
i
in
range
(
0
,
len
(
batches
)):
tensors
=
read_batch
(
batches
[
i
])
max_seqlength
=
max
(
max
([
len
(
_
)
for
_
in
tensors
]),
12800
)
max_seqlength
=
math
.
ceil
(
max_seqlength
/
pow
(
2
,
18
))
*
pow
(
2
,
18
)
max_batch
=
math
.
ceil
(
len
(
tensors
)
/
4
)
*
4
input
=
torch
.
zeros
(
max_batch
,
max_seqlength
)
for
i
,
wav
in
enumerate
(
tensors
):
input
[
i
,
:
len
(
wav
)].
copy_
(
wav
)
input
=
input
.
to
(
device
)
inputs
.
append
(
input
)
return
inputs
def
prepare_model_input_warmup
(
path
:
str
,
device
=
torch
.
device
(
'cpu'
)):
audio_tensor
=
read_audio
(
path
)
inputs
=
[]
max_batnum
=
24
min_batnum
=
4
for
l
in
range
(
8
):
max_seqlength
=
(
l
+
1
)
*
pow
(
2
,
18
)
single_input
=
torch
.
zeros
(
1
,
max_seqlength
)
for
i
in
range
(
int
(
max_seqlength
/
len
(
audio_tensor
))):
single_input
[
0
,
i
*
len
(
audio_tensor
):(
i
+
1
)
*
len
(
audio_tensor
)].
copy_
(
audio_tensor
)
for
bn
in
range
(
int
(
max_batnum
/
min_batnum
)):
input
=
torch
.
zeros
(
min_batnum
*
(
bn
+
1
),
max_seqlength
)
for
h
in
range
(
min_batnum
*
(
bn
+
1
)):
input
[
h
,
:
single_input
.
shape
[
1
]].
copy_
(
single_input
.
squeeze
(
0
))
input
=
input
.
to
(
device
)
inputs
.
append
(
input
)
return
inputs
class
Decoder
():
def
__init__
(
self
,
labels
:
List
[
str
]):
self
.
labels
=
labels
self
.
blank_idx
=
self
.
labels
.
index
(
'_'
)
self
.
space_idx
=
self
.
labels
.
index
(
' '
)
def
process
(
self
,
probs
,
wav_len
,
word_align
):
assert
len
(
self
.
labels
)
==
probs
.
shape
[
1
]
for_string
=
[]
argm
=
torch
.
argmax
(
probs
,
axis
=
1
)
align_list
=
[[]]
for
j
,
i
in
enumerate
(
argm
):
if
i
==
self
.
labels
.
index
(
'2'
):
try
:
prev
=
for_string
[
-
1
]
for_string
.
append
(
'$'
)
for_string
.
append
(
prev
)
align_list
[
-
1
].
append
(
j
)
continue
except
:
for_string
.
append
(
' '
)
warnings
.
warn
(
'Token "2" detected a the beginning of sentence, omitting'
)
align_list
.
append
([])
continue
if
i
!=
self
.
blank_idx
:
for_string
.
append
(
self
.
labels
[
i
])
if
i
==
self
.
space_idx
:
align_list
.
append
([])
else
:
align_list
[
-
1
].
append
(
j
)
string
=
''
.
join
([
x
[
0
]
for
x
in
groupby
(
for_string
)]).
replace
(
'$'
,
''
).
strip
()
align_list
=
list
(
filter
(
lambda
x
:
x
,
align_list
))
if
align_list
and
wav_len
and
word_align
:
align_dicts
=
[]
linear_align_coeff
=
wav_len
/
len
(
argm
)
to_move
=
min
(
align_list
[
0
][
0
],
1.5
)
for
i
,
align_word
in
enumerate
(
align_list
):
if
len
(
align_word
)
==
1
:
align_word
.
append
(
align_word
[
0
])
align_word
[
0
]
=
align_word
[
0
]
-
to_move
if
i
==
(
len
(
align_list
)
-
1
):
to_move
=
min
(
1.5
,
len
(
argm
)
-
i
)
align_word
[
-
1
]
=
align_word
[
-
1
]
+
to_move
else
:
to_move
=
min
(
1.5
,
(
align_list
[
i
+
1
][
0
]
-
align_word
[
-
1
])
/
2
)
align_word
[
-
1
]
=
align_word
[
-
1
]
+
to_move
for
word
,
timing
in
zip
(
string
.
split
(),
align_list
):
align_dicts
.
append
({
'word'
:
word
,
'start_ts'
:
round
(
timing
[
0
]
*
linear_align_coeff
,
2
),
'end_ts'
:
round
(
timing
[
-
1
]
*
linear_align_coeff
,
2
)})
return
string
,
align_dicts
return
string
def
__call__
(
self
,
probs
:
torch
.
Tensor
,
wav_len
:
float
=
0
,
word_align
:
bool
=
False
):
return
self
.
process
(
probs
,
wav_len
,
word_align
)
def
init_jit_model
(
model_url
:
str
,
device
:
torch
.
device
=
torch
.
device
(
'cpu'
)):
torch
.
set_grad_enabled
(
False
)
model_dir
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
"model"
)
os
.
makedirs
(
model_dir
,
exist_ok
=
True
)
model_path
=
os
.
path
.
join
(
model_dir
,
os
.
path
.
basename
(
model_url
))
if
not
os
.
path
.
isfile
(
model_path
):
torch
.
hub
.
download_url_to_file
(
model_url
,
model_path
,
progress
=
True
)
model
=
torch
.
jit
.
load
(
model_path
,
map_location
=
device
)
model
.
eval
()
return
model
,
Decoder
(
model
.
labels
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment