Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
Paraformer_FunASR_pytorch
Commits
70a8a9e0
Commit
70a8a9e0
authored
Oct 03, 2024
by
wangwei990215
Browse files
initial commit
parents
Pipeline
#1738
failed with stages
in 0 seconds
Changes
827
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1259 additions
and
0 deletions
+1259
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws/conf/fsmn_4e_l10r2_250_128_fdim80_t2599.yaml
...ing/fsmn_kws/conf/fsmn_4e_l10r2_250_128_fdim80_t2599.yaml
+95
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws/conf/fsmn_4e_l10r2_280_200_fdim40_t2602.yaml
...ing/fsmn_kws/conf/fsmn_4e_l10r2_280_200_fdim40_t2602.yaml
+95
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws/convert.py
.../examples/industrial_data_pretraining/fsmn_kws/convert.py
+134
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws/convert.sh
.../examples/industrial_data_pretraining/fsmn_kws/convert.sh
+26
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws/demo.py
FunASR/examples/industrial_data_pretraining/fsmn_kws/demo.py
+18
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws/finetune.sh
...examples/industrial_data_pretraining/fsmn_kws/finetune.sh
+173
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws/funasr
FunASR/examples/industrial_data_pretraining/fsmn_kws/funasr
+2
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws/infer.sh
...SR/examples/industrial_data_pretraining/fsmn_kws/infer.sh
+20
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws/infer_from_local.sh
.../industrial_data_pretraining/fsmn_kws/infer_from_local.sh
+41
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws/path.sh
FunASR/examples/industrial_data_pretraining/fsmn_kws/path.sh
+5
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/conf/fsmn_4e_l10r2_250_128_fdim80_t2599_t4.yaml
...mn_kws_mt/conf/fsmn_4e_l10r2_250_128_fdim80_t2599_t4.yaml
+103
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/conf/fsmn_4e_l10r2_280_200_fdim40_t2602_t4.yaml
...mn_kws_mt/conf/fsmn_4e_l10r2_280_200_fdim40_t2602_t4.yaml
+103
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/convert.py
...amples/industrial_data_pretraining/fsmn_kws_mt/convert.py
+137
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/convert.sh
...amples/industrial_data_pretraining/fsmn_kws_mt/convert.sh
+36
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/demo.py
.../examples/industrial_data_pretraining/fsmn_kws_mt/demo.py
+18
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/finetune.sh
...mples/industrial_data_pretraining/fsmn_kws_mt/finetune.sh
+184
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/funasr
...R/examples/industrial_data_pretraining/fsmn_kws_mt/funasr
+2
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/infer.sh
...examples/industrial_data_pretraining/fsmn_kws_mt/infer.sh
+20
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/infer_from_local.sh
...dustrial_data_pretraining/fsmn_kws_mt/infer_from_local.sh
+42
-0
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/path.sh
.../examples/industrial_data_pretraining/fsmn_kws_mt/path.sh
+5
-0
No files found.
Too many changes to show.
To preserve performance only
827 of 827+
files are displayed.
Plain diff
Email patch
FunASR/examples/industrial_data_pretraining/fsmn_kws/conf/fsmn_4e_l10r2_250_128_fdim80_t2599.yaml
0 → 100644
View file @
70a8a9e0
# network architecture
model
:
FsmnKWS
model_conf
:
ctc_weight
:
1.0
# encoder related
encoder
:
FSMN
encoder_conf
:
input_dim
:
400
input_affine_dim
:
140
fsmn_layers
:
4
linear_dim
:
250
proj_dim
:
128
lorder
:
10
rorder
:
2
lstride
:
1
rstride
:
1
output_affine_dim
:
140
output_dim
:
2599
use_softmax
:
false
frontend
:
WavFrontend
frontend_conf
:
fs
:
16000
window
:
hamming
n_mels
:
80
frame_length
:
25
frame_shift
:
10
lfr_m
:
5
lfr_n
:
3
specaug
:
SpecAugLFR
specaug_conf
:
apply_time_warp
:
false
time_warp_window
:
5
time_warp_mode
:
bicubic
apply_freq_mask
:
true
freq_mask_width_range
:
-
0
-
30
lfr_rate
:
3
num_freq_mask
:
1
apply_time_mask
:
true
time_mask_width_range
:
-
0
-
12
num_time_mask
:
1
train_conf
:
accum_grad
:
1
grad_clip
:
5
max_epoch
:
100
keep_nbest_models
:
10
avg_nbest_model
:
10
avg_keep_nbest_models_type
:
loss
validate_interval
:
50000
save_checkpoint_interval
:
50000
avg_checkpoint_interval
:
1000
log_interval
:
50
optim
:
adam
optim_conf
:
lr
:
0.0005
scheduler
:
warmuplr
scheduler_conf
:
warmup_steps
:
10000
dataset
:
AudioDataset
dataset_conf
:
index_ds
:
IndexDSJsonl
batch_sampler
:
EspnetStyleBatchSampler
batch_type
:
length
# example or length
batch_size
:
32000
# if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
max_token_length
:
1600
# filter samples if source_token_len+target_token_len > max_token_length,
buffer_size
:
2048
shuffle
:
true
num_workers
:
8
preprocessor_speech
:
SpeechPreprocessSpeedPerturb
preprocessor_speech_conf
:
speed_perturb
:
[
0.9
,
1.0
,
1.1
]
tokenizer
:
CharTokenizer
tokenizer_conf
:
unk_symbol
:
<unk>
split_with_space
:
true
ctc_conf
:
dropout_rate
:
0.0
ctc_type
:
builtin
reduce
:
true
ignore_nan_grad
:
true
extra_linear
:
false
normalize
:
null
FunASR/examples/industrial_data_pretraining/fsmn_kws/conf/fsmn_4e_l10r2_280_200_fdim40_t2602.yaml
0 → 100644
View file @
70a8a9e0
# network architecture
model
:
FsmnKWS
model_conf
:
ctc_weight
:
1.0
# encoder related
encoder
:
FSMN
encoder_conf
:
input_dim
:
360
input_affine_dim
:
280
fsmn_layers
:
4
linear_dim
:
280
proj_dim
:
200
lorder
:
10
rorder
:
2
lstride
:
1
rstride
:
1
output_affine_dim
:
400
output_dim
:
2602
use_softmax
:
false
frontend
:
WavFrontend
frontend_conf
:
fs
:
16000
window
:
hamming
n_mels
:
40
frame_length
:
25
frame_shift
:
10
lfr_m
:
9
lfr_n
:
3
specaug
:
SpecAugLFR
specaug_conf
:
apply_time_warp
:
false
time_warp_window
:
5
time_warp_mode
:
bicubic
apply_freq_mask
:
true
freq_mask_width_range
:
-
0
-
30
lfr_rate
:
3
num_freq_mask
:
1
apply_time_mask
:
true
time_mask_width_range
:
-
0
-
12
num_time_mask
:
1
train_conf
:
accum_grad
:
1
grad_clip
:
5
max_epoch
:
100
keep_nbest_models
:
10
avg_nbest_model
:
10
avg_keep_nbest_models_type
:
loss
validate_interval
:
50000
save_checkpoint_interval
:
50000
avg_checkpoint_interval
:
1000
log_interval
:
50
optim
:
adam
optim_conf
:
lr
:
0.0005
scheduler
:
warmuplr
scheduler_conf
:
warmup_steps
:
10000
dataset
:
AudioDataset
dataset_conf
:
index_ds
:
IndexDSJsonl
batch_sampler
:
EspnetStyleBatchSampler
batch_type
:
length
# example or length
batch_size
:
32000
# if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
max_token_length
:
1600
# filter samples if source_token_len+target_token_len > max_token_length,
buffer_size
:
2048
shuffle
:
true
num_workers
:
8
preprocessor_speech
:
SpeechPreprocessSpeedPerturb
preprocessor_speech_conf
:
speed_perturb
:
[
0.9
,
1.0
,
1.1
]
tokenizer
:
CharTokenizer
tokenizer_conf
:
unk_symbol
:
<unk>
split_with_space
:
true
ctc_conf
:
dropout_rate
:
0.0
ctc_type
:
builtin
reduce
:
true
ignore_nan_grad
:
true
extra_linear
:
false
normalize
:
null
FunASR/examples/industrial_data_pretraining/fsmn_kws/convert.py
0 → 100644
View file @
70a8a9e0
from
__future__
import
print_function
import
argparse
import
copy
import
logging
import
os
from
shutil
import
copyfile
import
torch
import
yaml
from
typing
import
Union
from
funasr.models.fsmn_kws.model
import
FsmnKWSConvert
def
count_parameters
(
model
):
return
sum
(
p
.
numel
()
for
p
in
model
.
parameters
()
if
p
.
requires_grad
)
def
get_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'load and convert network to each other between kaldi/pytorch format'
)
parser
.
add_argument
(
'--config'
,
required
=
True
,
help
=
'config file'
)
parser
.
add_argument
(
'--network_file'
,
default
=
''
,
required
=
True
,
help
=
'input network, support kaldi.txt/pytorch.pt'
)
parser
.
add_argument
(
'--model_dir'
,
required
=
True
,
help
=
'save model dir'
)
parser
.
add_argument
(
'--model_name'
,
required
=
True
,
help
=
'save model name'
)
parser
.
add_argument
(
'--convert_to'
,
default
=
'kaldi'
,
required
=
True
,
help
=
'target network type, kaldi/pytorch'
)
args
=
parser
.
parse_args
()
return
args
def
convert_to_kaldi
(
configs
,
network_file
,
model_dir
,
model_name
=
"convert.kaldi.txt"
):
copyfile
(
network_file
,
os
.
path
.
join
(
model_dir
,
'origin.torch.pt'
))
model
=
FsmnKWSConvert
(
vocab_size
=
configs
[
'encoder_conf'
][
'output_dim'
],
encoder
=
'FSMNConvert'
,
encoder_conf
=
configs
[
'encoder_conf'
],
ctc_conf
=
configs
[
'ctc_conf'
],
)
print
(
model
)
num_params
=
count_parameters
(
model
)
print
(
'the number of model params: {}'
.
format
(
num_params
))
states
=
torch
.
load
(
network_file
,
map_location
=
'cpu'
)
model
.
load_state_dict
(
states
[
"state_dict"
])
kaldi_text
=
os
.
path
.
join
(
model_dir
,
model_name
)
with
open
(
kaldi_text
,
'w'
,
encoding
=
'utf8'
)
as
fout
:
nnet_desp
=
model
.
to_kaldi_net
()
fout
.
write
(
nnet_desp
)
fout
.
close
()
def
convert_to_pytorch
(
configs
,
network_file
,
model_dir
,
model_name
=
"convert.torch.pt"
):
model
=
FsmnKWSConvert
(
vocab_size
=
configs
[
'encoder_conf'
][
'output_dim'
],
frontend
=
None
,
specaug
=
None
,
normalize
=
None
,
encoder
=
'FSMNConvert'
,
encoder_conf
=
configs
[
'encoder_conf'
],
ctc_conf
=
configs
[
'ctc_conf'
],
)
num_params
=
count_parameters
(
model
)
print
(
'the number of model params: {}'
.
format
(
num_params
))
copyfile
(
network_file
,
os
.
path
.
join
(
model_dir
,
'origin.kaldi.txt'
))
model
.
to_pytorch_net
(
network_file
)
save_model_path
=
os
.
path
.
join
(
model_dir
,
model_name
)
torch
.
save
({
"model"
:
model
.
state_dict
()},
save_model_path
)
print
(
'convert torch format back to kaldi'
)
kaldi_text
=
os
.
path
.
join
(
model_dir
,
'convert.kaldi.txt'
)
with
open
(
kaldi_text
,
'w'
,
encoding
=
'utf8'
)
as
fout
:
nnet_desp
=
model
.
to_kaldi_net
()
fout
.
write
(
nnet_desp
)
fout
.
close
()
print
(
'Done!'
)
def
main
():
args
=
get_args
()
logging
.
basicConfig
(
level
=
logging
.
DEBUG
,
format
=
'%(asctime)s %(levelname)s %(message)s'
)
print
(
args
)
with
open
(
args
.
config
,
'r'
)
as
fin
:
configs
=
yaml
.
load
(
fin
,
Loader
=
yaml
.
FullLoader
)
if
args
.
convert_to
==
'pytorch'
:
print
(
'convert kaldi net to pytorch...'
)
convert_to_pytorch
(
configs
,
args
.
network_file
,
args
.
model_dir
,
args
.
model_name
)
elif
args
.
convert_to
==
'kaldi'
:
print
(
'convert pytorch net to kaldi...'
)
convert_to_kaldi
(
configs
,
args
.
network_file
,
args
.
model_dir
,
args
.
model_name
)
else
:
print
(
'unsupported target network type: {}'
.
format
(
args
.
convert_to
))
if
__name__
==
'__main__'
:
main
()
FunASR/examples/industrial_data_pretraining/fsmn_kws/convert.sh
0 → 100644
View file @
70a8a9e0
workspace
=
`
pwd
`
# download model
local_path_root
=
${
workspace
}
/modelscope_models_kws
mkdir
-p
${
local_path_root
}
local_path
=
${
local_path_root
}
/speech_charctc_kws_phone-xiaoyun
if
[
!
-d
"
$local_path
"
]
;
then
git clone https://www.modelscope.cn/iic/speech_charctc_kws_phone-xiaoyun.git
${
local_path
}
fi
export
PATH
=
${
local_path
}
/runtime:
$PATH
export
LD_LIBRARY_PATH
=
${
local_path
}
/runtime:
$LD_LIBRARY_PATH
config
=
./conf/fsmn_4e_l10r2_250_128_fdim80_t2599.yaml
torch_nnet
=
exp/finetune_outputs/model.pt.avg10
out_dir
=
exp/finetune_outputs
if
[
!
-d
"
$out_dir
"
]
;
then
mkdir
-p
$out_dir
fi
python convert.py
--config
$config
--network_file
$torch_nnet
--model_dir
$out_dir
--model_name
"convert.kaldi.txt"
--convert_to
kaldi
nnet-copy
--binary
=
true
${
out_dir
}
/convert.kaldi.txt
${
out_dir
}
/convert.kaldi.net
FunASR/examples/industrial_data_pretraining/fsmn_kws/demo.py
0 → 100644
View file @
70a8a9e0
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
from
funasr
import
AutoModel
model
=
AutoModel
(
model
=
"iic/speech_charctc_kws_phone-xiaoyun"
,
keywords
=
"小云小云"
,
output_dir
=
"./outputs/debug"
,
device
=
'cpu'
)
test_wav
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/KWS/pos_testset/kws_xiaoyunxiaoyun.wav"
res
=
model
.
generate
(
input
=
test_wav
,
cache
=
{},)
print
(
res
)
FunASR/examples/industrial_data_pretraining/fsmn_kws/finetune.sh
0 → 100755
View file @
70a8a9e0
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
#!/usr/bin/env bash
# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set
-e
set
-u
set
-o
pipefail
.
./path.sh
workspace
=
`
pwd
`
CUDA_VISIBLE_DEVICES
=
"0,1"
stage
=
2
stop_stage
=
3
inference_device
=
"cuda"
#"cpu"
inference_checkpoint
=
"model.pt.avg10"
inference_scp
=
"wav.scp"
inference_batch_size
=
32
nj
=
32
test_sets
=
"test"
# model_name from model_hub, or model_dir in local path
## option 1, download model automatically, unsupported currently
model_name_or_model_dir
=
"iic/speech_charctc_kws_phone-xiaoyun"
## option 2, download model by git
local_path_root
=
${
workspace
}
/modelscope_models
model_name_or_model_dir
=
${
local_path_root
}
/
${
model_name_or_model_dir
}
if
[
!
-d
$model_name_or_model_dir
]
;
then
mkdir
-p
${
model_name_or_model_dir
}
git clone https://www.modelscope.cn/iic/speech_charctc_kws_phone-xiaoyun.git
${
model_name_or_model_dir
}
fi
config
=
fsmn_4e_l10r2_250_128_fdim80_t2599.yaml
token_list
=
${
model_name_or_model_dir
}
/funasr/tokens_2599.txt
lexicon_list
=
${
model_name_or_model_dir
}
/funasr/lexicon.txt
cmvn_file
=
${
model_name_or_model_dir
}
/funasr/am.mvn.dim80_l2r2
init_param
=
"
${
model_name_or_model_dir
}
/funasr/basetrain_fsmn_4e_l10r2_250_128_fdim80_t2599.pt"
# data prepare
# data dir, which contains: train.json, val.json
data_dir
=
../../data
train_data
=
"
${
data_dir
}
/train.jsonl"
val_data
=
"
${
data_dir
}
/val.jsonl"
if
[
${
stage
}
-le
1
]
&&
[
${
stop_stage
}
-ge
1
]
;
then
echo
"stage 1: Generate audio json list"
# generate train.jsonl and val.jsonl from wav.scp and text.txt
python
$FUNASR_DIR
/funasr/datasets/audio_datasets/scp2jsonl.py
\
++scp_file_list
=
'['''
${
data_dir
}
/train_wav.scp
''', '''
${
data_dir
}
/train_text.txt
''']'
\
++data_type_list
=
'["source", "target"]'
\
++jsonl_file_out
=
"
${
train_data
}
"
python
$FUNASR_DIR
/funasr/datasets/audio_datasets/scp2jsonl.py
\
++scp_file_list
=
'['''
${
data_dir
}
/val_wav.scp
''', '''
${
data_dir
}
/val_text.txt
''']'
\
++data_type_list
=
'["source", "target"]'
\
++jsonl_file_out
=
"
${
val_data
}
"
fi
# exp output dir
output_dir
=
"
${
workspace
}
/exp/finetune_outputs"
# Training Stage
if
[
${
stage
}
-le
2
]
&&
[
${
stop_stage
}
-ge
2
]
;
then
echo
"stage 2: KWS Training"
mkdir
-p
${
output_dir
}
current_time
=
$(
date
"+%Y-%m-%d_%H-%M"
)
log_file
=
"
${
output_dir
}
/train.log.txt.
${
current_time
}
"
echo
"log_file:
${
log_file
}
"
echo
"finetune use basetrain model:
${
init_param
}
"
export
CUDA_VISIBLE_DEVICES
=
$CUDA_VISIBLE_DEVICES
gpu_num
=
$(
echo
$CUDA_VISIBLE_DEVICES
|
awk
-F
","
'{print NF}'
)
torchrun
--nnodes
1
--nproc_per_node
${
gpu_num
}
\
../../../funasr/bin/train.py
\
--config-path
"
${
workspace
}
/conf"
\
--config-name
"
${
config
}
"
\
++init_param
=
"
${
init_param
}
"
\
++disable_update
=
true
\
++train_data_set_list
=
"
${
train_data
}
"
\
++valid_data_set_list
=
"
${
val_data
}
"
\
++tokenizer_conf.token_list
=
"
${
token_list
}
"
\
++tokenizer_conf.seg_dict
=
"
${
lexicon_list
}
"
\
++frontend_conf.cmvn_file
=
"
${
cmvn_file
}
"
\
++output_dir
=
"
${
output_dir
}
"
&>
${
log_file
}
fi
# Testing Stage
if
[
${
stage
}
-le
3
]
&&
[
${
stop_stage
}
-ge
3
]
;
then
echo
"stage 3: Inference"
keywords
=(
小云小云
)
keywords_string
=
$(
IFS
=
,
;
echo
"
${
keywords
[*]
}
"
)
echo
"keywords:
$keywords_string
"
if
[
${
inference_device
}
==
"cuda"
]
;
then
nj
=
$(
echo
$CUDA_VISIBLE_DEVICES
|
awk
-F
","
'{print NF}'
)
else
inference_batch_size
=
1
CUDA_VISIBLE_DEVICES
=
""
for
JOB
in
$(
seq
${
nj
}
)
;
do
CUDA_VISIBLE_DEVICES
=
$CUDA_VISIBLE_DEVICES
"-1,"
done
fi
for
dset
in
${
test_sets
}
;
do
inference_dir
=
"
${
output_dir
}
/inference-
${
inference_checkpoint
}
/
${
dset
}
"
_logdir
=
"
${
inference_dir
}
/logdir"
echo
"inference_dir:
${
inference_dir
}
"
mkdir
-p
"
${
_logdir
}
"
test_data_dir
=
"
${
data_dir
}
/
${
dset
}
"
key_file
=
${
test_data_dir
}
/
${
inference_scp
}
split_scps
=
for
JOB
in
$(
seq
"
${
nj
}
"
)
;
do
split_scps+
=
"
${
_logdir
}
/keys.
${
JOB
}
.scp"
done
$FUNASR_DIR
/examples/aishell/paraformer/utils/split_scp.pl
"
${
key_file
}
"
${
split_scps
}
gpuid_list_array
=(
${
CUDA_VISIBLE_DEVICES
//,/
}
)
for
JOB
in
$(
seq
${
nj
}
)
;
do
{
id
=
$((
JOB-1
))
gpuid
=
${
gpuid_list_array
[
$id
]
}
echo
"
${
output_dir
}
"
export
CUDA_VISIBLE_DEVICES
=
${
gpuid
}
python ../../../funasr/bin/inference.py
\
--config-path
=
"
${
output_dir
}
"
\
--config-name
=
"config.yaml"
\
++init_param
=
"
${
output_dir
}
/
${
inference_checkpoint
}
"
\
++tokenizer_conf.token_list
=
"
${
token_list
}
"
\
++tokenizer_conf.seg_dict
=
"
${
lexicon_list
}
"
\
++frontend_conf.cmvn_file
=
"
${
cmvn_file
}
"
\
++keywords
=
"
\"
$keywords_string
"
\"
\
++input
=
"
${
_logdir
}
/keys.
${
JOB
}
.scp"
\
++output_dir
=
"
${
inference_dir
}
/
${
JOB
}
"
\
++device
=
"
${
inference_device
}
"
\
++ncpu
=
1
\
++disable_log
=
true
\
++batch_size
=
"
${
inference_batch_size
}
"
&>
${
_logdir
}
/log.
${
JOB
}
.txt
}
&
done
wait
for
f
in
detect
;
do
if
[
-f
"
${
inference_dir
}
/
${
JOB
}
/
${
f
}
"
]
;
then
for
JOB
in
$(
seq
"
${
nj
}
"
)
;
do
cat
"
${
inference_dir
}
/
${
JOB
}
/
${
f
}
"
done
|
sort
-k1
>
"
${
inference_dir
}
/
${
f
}
"
fi
done
python funasr/utils/compute_det_ctc.py
\
--keywords
${
keywords_string
}
\
--test_data
${
test_data_dir
}
/wav.scp
\
--trans_data
${
test_data_dir
}
/text
\
--score_file
${
inference_dir
}
/detect
\
--stats_dir
${
inference_dir
}
done
fi
FunASR/examples/industrial_data_pretraining/fsmn_kws/funasr
0 → 120000
View file @
70a8a9e0
../../../funasr
\ No newline at end of file
FunASR/examples/industrial_data_pretraining/fsmn_kws/infer.sh
0 → 100644
View file @
70a8a9e0
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
# method1, inference from model hub
model
=
"iic/speech_charctc_kws_phone-xiaoyun"
# for more input type, please ref to readme.md
input
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/KWS/pos_testset/kws_xiaoyunxiaoyun.wav"
keywords
=(
小云小云
)
keywords_string
=
$(
IFS
=
,
;
echo
"
${
keywords
[*]
}
"
)
echo
"keywords:
$keywords_string
"
python funasr/bin/inference.py
\
+model
=
${
model
}
\
+input
=
${
input
}
\
+output_dir
=
"./outputs/debug"
\
+device
=
"cpu"
\
++keywords
=
"
\"
$keywords_string
"
\"
FunASR/examples/industrial_data_pretraining/fsmn_kws/infer_from_local.sh
0 → 100644
View file @
70a8a9e0
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
# method2, inference from local model
# for more input type, please ref to readme.md
input
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/KWS/pos_testset/kws_xiaoyunxiaoyun.wav"
output_dir
=
"./outputs/debug"
workspace
=
`
pwd
`
# download model
local_path_root
=
${
workspace
}
/modelscope_models
mkdir
-p
${
local_path_root
}
local_path
=
${
local_path_root
}
/speech_charctc_kws_phone-xiaoyun
git clone https://www.modelscope.cn/iic/speech_charctc_kws_phone-xiaoyun.git
${
local_path
}
device
=
"cuda:0"
# "cuda:0" for gpu0, "cuda:1" for gpu1, "cpu"
config
=
"inference_fsmn_4e_l10r2_250_128_fdim80_t2599.yaml"
tokens
=
"
${
local_path
}
/funasr/tokens_2599.txt"
seg_dict
=
"
${
local_path
}
/funasr/lexicon.txt"
init_param
=
"
${
local_path
}
/funasr/finetune_fsmn_4e_l10r2_250_128_fdim80_t2599_xiaoyun_xiaoyun.pt"
cmvn_file
=
"
${
local_path
}
/funasr/am.mvn.dim80_l2r2"
keywords
=(
小云小云
)
keywords_string
=
$(
IFS
=
,
;
echo
"
${
keywords
[*]
}
"
)
echo
"keywords:
$keywords_string
"
python
-m
funasr.bin.inference
\
--config-path
"
${
local_path
}
/funasr"
\
--config-name
"
${
config
}
"
\
++init_param
=
"
${
init_param
}
"
\
++frontend_conf.cmvn_file
=
"
${
cmvn_file
}
"
\
++tokenizer_conf.token_list
=
"
${
tokens
}
"
\
++tokenizer_conf.seg_dict
=
"
${
seg_dict
}
"
\
++input
=
"
${
input
}
"
\
++output_dir
=
"
${
output_dir
}
"
\
++device
=
"
${
device
}
"
\
++keywords
=
"
\"
$keywords_string
"
\"
FunASR/examples/industrial_data_pretraining/fsmn_kws/path.sh
0 → 100755
View file @
70a8a9e0
export
FUNASR_DIR
=
$PWD
/../../..
# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export
PYTHONIOENCODING
=
UTF-8
export
PATH
=
$FUNASR_DIR
/funasr/bin:
$PATH
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/conf/fsmn_4e_l10r2_250_128_fdim80_t2599_t4.yaml
0 → 100644
View file @
70a8a9e0
# network architecture
model
:
FsmnKWSMT
model_conf
:
ctc_weight
:
1.0
# encoder related
encoder
:
FSMNMT
encoder_conf
:
input_dim
:
400
input_affine_dim
:
140
fsmn_layers
:
4
linear_dim
:
250
proj_dim
:
128
lorder
:
10
rorder
:
2
lstride
:
1
rstride
:
1
output_affine_dim
:
140
output_dim
:
2599
output_dim2
:
4
use_softmax
:
false
frontend
:
WavFrontend
frontend_conf
:
fs
:
16000
window
:
hamming
n_mels
:
80
frame_length
:
25
frame_shift
:
10
lfr_m
:
5
lfr_n
:
3
specaug
:
SpecAugLFR
specaug_conf
:
apply_time_warp
:
false
time_warp_window
:
5
time_warp_mode
:
bicubic
apply_freq_mask
:
true
freq_mask_width_range
:
-
0
-
30
lfr_rate
:
3
num_freq_mask
:
1
apply_time_mask
:
true
time_mask_width_range
:
-
0
-
12
num_time_mask
:
1
train_conf
:
accum_grad
:
1
grad_clip
:
5
max_epoch
:
100
keep_nbest_models
:
100
avg_nbest_model
:
10
avg_keep_nbest_models_type
:
loss
log_interval
:
50
optim
:
adam
optim_conf
:
lr
:
0.001
scheduler
:
warmuplr
scheduler_conf
:
warmup_steps
:
10000
dataset
:
KwsMTDataset
dataset_conf
:
index_ds
:
IndexDSJsonl
batch_sampler
:
EspnetStyleBatchSampler
batch_type
:
length
# example or length
batch_size
:
64000
# if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
max_token_length
:
1600
# filter samples if source_token_len+target_token_len > max_token_length,
buffer_size
:
2048
shuffle
:
true
num_workers
:
8
preprocessor_speech
:
SpeechPreprocessSpeedPerturb
preprocessor_speech_conf
:
speed_perturb
:
[
0.9
,
1.0
,
1.1
]
dataloader
:
DataloaderMapStyle
tokenizer
:
-
CharTokenizer
-
CharTokenizer
tokenizer_conf
:
-
unk_symbol
:
<unk>
split_with_space
:
true
token_list
:
null
seg_dict
:
null
-
unk_symbol
:
<unk>
split_with_space
:
true
token_list
:
null
seg_dict
:
null
ctc_conf
:
dropout_rate
:
0.0
ctc_type
:
builtin
# ctc_type: focalctc, builtin
reduce
:
true
ignore_nan_grad
:
true
extra_linear
:
false
normalize
:
null
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/conf/fsmn_4e_l10r2_280_200_fdim40_t2602_t4.yaml
0 → 100644
View file @
70a8a9e0
# network architecture
model
:
FsmnKWSMT
model_conf
:
ctc_weight
:
1.0
# encoder related
encoder
:
FSMNMT
encoder_conf
:
input_dim
:
360
input_affine_dim
:
280
fsmn_layers
:
4
linear_dim
:
280
proj_dim
:
200
lorder
:
10
rorder
:
2
lstride
:
1
rstride
:
1
output_affine_dim
:
400
output_dim
:
2602
output_dim2
:
4
use_softmax
:
false
frontend
:
WavFrontend
frontend_conf
:
fs
:
16000
window
:
hamming
n_mels
:
40
frame_length
:
25
frame_shift
:
10
lfr_m
:
9
lfr_n
:
3
specaug
:
SpecAugLFR
specaug_conf
:
apply_time_warp
:
false
time_warp_window
:
5
time_warp_mode
:
bicubic
apply_freq_mask
:
true
freq_mask_width_range
:
-
0
-
30
lfr_rate
:
3
num_freq_mask
:
1
apply_time_mask
:
true
time_mask_width_range
:
-
0
-
12
num_time_mask
:
1
train_conf
:
accum_grad
:
1
grad_clip
:
5
max_epoch
:
100
keep_nbest_models
:
100
avg_nbest_model
:
10
avg_keep_nbest_models_type
:
loss
log_interval
:
50
optim
:
adam
optim_conf
:
lr
:
0.001
scheduler
:
warmuplr
scheduler_conf
:
warmup_steps
:
10000
dataset
:
KwsMTDataset
dataset_conf
:
index_ds
:
IndexDSJsonl
batch_sampler
:
EspnetStyleBatchSampler
batch_type
:
length
# example or length
batch_size
:
64000
# if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
max_token_length
:
1600
# filter samples if source_token_len+target_token_len > max_token_length,
buffer_size
:
2048
shuffle
:
true
num_workers
:
8
preprocessor_speech
:
SpeechPreprocessSpeedPerturb
preprocessor_speech_conf
:
speed_perturb
:
[
0.9
,
1.0
,
1.1
]
dataloader
:
DataloaderMapStyle
tokenizer
:
-
CharTokenizer
-
CharTokenizer
tokenizer_conf
:
-
unk_symbol
:
<unk>
split_with_space
:
true
token_list
:
null
seg_dict
:
null
-
unk_symbol
:
<unk>
split_with_space
:
true
token_list
:
null
seg_dict
:
null
ctc_conf
:
dropout_rate
:
0.0
ctc_type
:
builtin
# ctc_type: focalctc, builtin
reduce
:
true
ignore_nan_grad
:
true
extra_linear
:
false
normalize
:
null
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/convert.py
0 → 100644
View file @
70a8a9e0
from
__future__
import
print_function
import
argparse
import
copy
import
logging
import
os
from
shutil
import
copyfile
import
torch
import
yaml
from
typing
import
Union
from
funasr.models.fsmn_kws_mt.encoder
import
FSMNMTConvert
from
funasr.models.fsmn_kws_mt.model
import
FsmnKWSMTConvert
def
count_parameters
(
model
):
return
sum
(
p
.
numel
()
for
p
in
model
.
parameters
()
if
p
.
requires_grad
)
def
get_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'load and convert network to each other between kaldi/pytorch format'
)
parser
.
add_argument
(
'--config'
,
required
=
True
,
help
=
'config file'
)
parser
.
add_argument
(
'--network_file'
,
default
=
''
,
required
=
True
,
help
=
'input network, support kaldi.txt/pytorch.pt'
)
parser
.
add_argument
(
'--model_dir'
,
required
=
True
,
help
=
'save model dir'
)
parser
.
add_argument
(
'--model_name'
,
required
=
True
,
help
=
'save model name'
)
parser
.
add_argument
(
'--model_name2'
,
required
=
True
,
help
=
'save model name'
)
parser
.
add_argument
(
'--convert_to'
,
default
=
'kaldi'
,
required
=
True
,
help
=
'target network type, kaldi/pytorch'
)
args
=
parser
.
parse_args
()
return
args
def
convert_to_kaldi
(
configs
,
network_file
,
model_dir
,
model_name
=
"convert.kaldi.txt"
,
model_name2
=
"convert.kaldi2.txt"
):
copyfile
(
network_file
,
os
.
path
.
join
(
model_dir
,
'origin.torch.pt'
))
model
=
FsmnKWSMTConvert
(
encoder
=
'FSMNMTConvert'
,
encoder_conf
=
configs
[
'encoder_conf'
],
ctc_conf
=
configs
[
'ctc_conf'
],
)
print
(
model
)
num_params
=
count_parameters
(
model
)
print
(
'the number of model params: {}'
.
format
(
num_params
))
states
=
torch
.
load
(
network_file
,
map_location
=
'cpu'
)
model
.
load_state_dict
(
states
[
"state_dict"
])
kaldi_text
=
os
.
path
.
join
(
model_dir
,
model_name
)
with
open
(
kaldi_text
,
'w'
,
encoding
=
'utf8'
)
as
fout
:
nnet_desp
=
model
.
to_kaldi_net
()
fout
.
write
(
nnet_desp
)
fout
.
close
()
kaldi_text2
=
os
.
path
.
join
(
model_dir
,
model_name2
)
with
open
(
kaldi_text2
,
'w'
,
encoding
=
'utf8'
)
as
fout
:
nnet_desp2
=
model
.
to_kaldi_net2
()
fout
.
write
(
nnet_desp2
)
fout
.
close
()
def
convert_to_pytorch
(
configs
,
network_file
,
model_dir
,
model_name
=
"convert.torch.pt"
):
model
=
FsmnKWSMTConvert
(
encoder
=
'FSMNMTConvert'
,
encoder_conf
=
configs
[
'encoder_conf'
],
ctc_conf
=
configs
[
'ctc_conf'
],
)
num_params
=
count_parameters
(
model
)
print
(
'the number of model params: {}'
.
format
(
num_params
))
copyfile
(
network_file
,
os
.
path
.
join
(
model_dir
,
'origin.kaldi.txt'
))
model
.
to_pytorch_net
(
network_file
)
save_model_path
=
os
.
path
.
join
(
model_dir
,
model_name
)
torch
.
save
({
"model"
:
model
.
state_dict
()},
save_model_path
)
print
(
'convert torch format back to kaldi'
)
kaldi_text
=
os
.
path
.
join
(
model_dir
,
'convert.kaldi.txt'
)
with
open
(
kaldi_text
,
'w'
,
encoding
=
'utf8'
)
as
fout
:
nnet_desp
=
model
.
to_kaldi_net
()
fout
.
write
(
nnet_desp
)
fout
.
close
()
print
(
'Done!'
)
def
main
():
args
=
get_args
()
logging
.
basicConfig
(
level
=
logging
.
DEBUG
,
format
=
'%(asctime)s %(levelname)s %(message)s'
)
print
(
args
)
with
open
(
args
.
config
,
'r'
)
as
fin
:
configs
=
yaml
.
load
(
fin
,
Loader
=
yaml
.
FullLoader
)
if
args
.
convert_to
==
'pytorch'
:
print
(
'convert kaldi net to pytorch...'
)
convert_to_pytorch
(
configs
,
args
.
network_file
,
args
.
model_dir
,
args
.
model_name
,
args
.
model_name2
,
)
elif
args
.
convert_to
==
'kaldi'
:
print
(
'convert pytorch net to kaldi...'
)
convert_to_kaldi
(
configs
,
args
.
network_file
,
args
.
model_dir
,
args
.
model_name
)
else
:
print
(
'unsupported target network type: {}'
.
format
(
args
.
convert_to
))
if
__name__
==
'__main__'
:
main
()
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/convert.sh
0 → 100644
View file @
70a8a9e0
workspace
=
`
pwd
`
# download model
local_path_root
=
${
workspace
}
/modelscope_models
mkdir
-p
${
local_path_root
}
local_path
=
${
local_path_root
}
/speech_charctc_kws_phone-xiaoyun_mt
if
[
!
-d
"
$local_path
"
]
;
then
git clone https://www.modelscope.cn/iic/speech_charctc_kws_phone-xiaoyun_mt.git
${
local_path
}
fi
export
PATH
=
${
local_path
}
/runtime:
$PATH
export
LD_LIBRARY_PATH
=
${
local_path
}
/runtime:
$LD_LIBRARY_PATH
# finetune config file
config
=
./conf/fsmn_4e_l10r2_250_128_fdim80_t2599_t4.yaml
# finetune output checkpoint
torch_nnet
=
exp/finetune_outputs/model.pt.avg10
out_dir
=
exp/finetune_outputs
if
[
!
-d
"
$out_dir
"
]
;
then
mkdir
-p
$out_dir
fi
python convert.py
--config
$config
\
--network_file
$torch_nnet
\
--model_dir
$out_dir
\
--model_name
"convert.kaldi.txt"
\
--model_name2
"convert.kaldi2.txt"
\
--convert_to
kaldi
nnet-copy
--binary
=
true
${
out_dir
}
/convert.kaldi.txt
${
out_dir
}
/convert.kaldi.net
nnet-copy
--binary
=
true
${
out_dir
}
/convert.kaldi2.txt
${
out_dir
}
/convert.kaldi2.net
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/demo.py
0 → 100644
View file @
70a8a9e0
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
from
funasr
import
AutoModel
model
=
AutoModel
(
model
=
"iic/speech_charctc_kws_phone-xiaoyun_mt"
,
keywords
=
"小云小云"
,
output_dir
=
"./outputs/debug"
,
device
=
'cpu'
)
test_wav
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/KWS/pos_testset/kws_xiaoyunxiaoyun.wav"
res
=
model
.
generate
(
input
=
test_wav
,
cache
=
{},)
print
(
res
)
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/finetune.sh
0 → 100755
View file @
70a8a9e0
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
#!/usr/bin/env bash
# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set
-e
set
-u
set
-o
pipefail
.
./path.sh
workspace
=
`
pwd
`
CUDA_VISIBLE_DEVICES
=
"0,1"
stage
=
2
stop_stage
=
3
inference_device
=
"cuda"
#"cpu"
inference_checkpoint
=
"model.pt.avg10"
inference_scp
=
"wav.scp"
inference_batch_size
=
32
nj
=
32
test_sets
=
"test"
# model_name from model_hub, or model_dir in local path
## option 1, download model automatically, unsupported currently
model_name_or_model_dir
=
"iic/speech_charctc_kws_phone-xiaoyun_mt"
## option 2, download model by git
local_path_root
=
${
workspace
}
/modelscope_models
model_name_or_model_dir
=
${
local_path_root
}
/
${
model_name_or_model_dir
}
if
[
!
-d
$model_name_or_model_dir
]
;
then
mkdir
-p
${
model_name_or_model_dir
}
git clone https://www.modelscope.cn/iic/speech_charctc_kws_phone-xiaoyun_mt.git
${
model_name_or_model_dir
}
fi
config
=
fsmn_4e_l10r2_250_128_fdim80_t2599_t4.yaml
token_list
=
${
model_name_or_model_dir
}
/funasr/tokens_2599.txt
token_list2
=
${
model_name_or_model_dir
}
/funasr/tokens_xiaoyun.txt
lexicon_list
=
${
model_name_or_model_dir
}
/funasr/lexicon.txt
cmvn_file
=
${
model_name_or_model_dir
}
/funasr/am.mvn.dim80_l2r2
init_param
=
"
${
model_name_or_model_dir
}
/funasr/basetrain_fsmn_4e_l10r2_250_128_fdim80_t2599.pt"
# data prepare
# data dir, which contains: train.json, val.json
data_dir
=
../../data
train_data
=
"
${
data_dir
}
/train.jsonl"
val_data
=
"
${
data_dir
}
/val.jsonl"
if
[
${
stage
}
-le
1
]
&&
[
${
stop_stage
}
-ge
1
]
;
then
echo
"stage 1: Generate audio json list"
# generate train.jsonl and val.jsonl from wav.scp and text.txt
python
$FUNASR_DIR
/funasr/datasets/audio_datasets/scp2jsonl.py
\
++scp_file_list
=
'['''
${
data_dir
}
/train_wav.scp
''', '''
${
data_dir
}
/train_text.txt
''']'
\
++data_type_list
=
'["source", "target"]'
\
++jsonl_file_out
=
"
${
train_data
}
"
python
$FUNASR_DIR
/funasr/datasets/audio_datasets/scp2jsonl.py
\
++scp_file_list
=
'['''
${
data_dir
}
/val_wav.scp
''', '''
${
data_dir
}
/val_text.txt
''']'
\
++data_type_list
=
'["source", "target"]'
\
++jsonl_file_out
=
"
${
val_data
}
"
fi
# exp output dir
output_dir
=
"
${
workspace
}
/exp/finetune_outputs"
# Training Stage
if
[
${
stage
}
-le
2
]
&&
[
${
stop_stage
}
-ge
2
]
;
then
echo
"stage 2: KWS Training"
mkdir
-p
${
output_dir
}
current_time
=
$(
date
"+%Y-%m-%d_%H-%M"
)
log_file
=
"
${
output_dir
}
/train.log.txt.
${
current_time
}
"
echo
"log_file:
${
log_file
}
"
echo
"finetune use basetrain model:
${
init_param
}
"
export
CUDA_VISIBLE_DEVICES
=
$CUDA_VISIBLE_DEVICES
gpu_num
=
$(
echo
$CUDA_VISIBLE_DEVICES
|
awk
-F
","
'{print NF}'
)
torchrun
--nnodes
1
--nproc_per_node
${
gpu_num
}
\
../../../funasr/bin/train.py
\
--config-path
"
${
workspace
}
/conf"
\
--config-name
"
${
config
}
"
\
++init_param
=
"
${
init_param
}
"
\
++token_lists
=
'['''
${
token_list
}
''', '''
${
token_list2
}
''']'
\
++seg_dicts
=
'['''
${
lexicon_list
}
''', '''
${
lexicon_list
}
''']'
\
++disable_update
=
true
\
++train_data_set_list
=
"
${
train_data
}
"
\
++valid_data_set_list
=
"
${
val_data
}
"
\
++frontend_conf.cmvn_file
=
"
${
cmvn_file
}
"
\
++output_dir
=
"
${
output_dir
}
"
&>
${
log_file
}
fi
# Testing Stage
if
[
${
stage
}
-le
3
]
&&
[
${
stop_stage
}
-ge
3
]
;
then
echo
"stage 3: Inference"
keywords
=(
小云小云
)
keywords_string
=
$(
IFS
=
,
;
echo
"
${
keywords
[*]
}
"
)
echo
"keywords:
$keywords_string
"
if
[
${
inference_device
}
==
"cuda"
]
;
then
nj
=
$(
echo
$CUDA_VISIBLE_DEVICES
|
awk
-F
","
'{print NF}'
)
else
inference_batch_size
=
1
CUDA_VISIBLE_DEVICES
=
""
for
JOB
in
$(
seq
${
nj
}
)
;
do
CUDA_VISIBLE_DEVICES
=
$CUDA_VISIBLE_DEVICES
"-1,"
done
fi
for
dset
in
${
test_sets
}
;
do
inference_dir
=
"
${
output_dir
}
/inference-
${
inference_checkpoint
}
/
${
dset
}
"
_logdir
=
"
${
inference_dir
}
/logdir"
echo
"inference_dir:
${
inference_dir
}
"
mkdir
-p
"
${
_logdir
}
"
test_data_dir
=
"
${
data_dir
}
/
${
dset
}
"
key_file
=
${
test_data_dir
}
/
${
inference_scp
}
split_scps
=
for
JOB
in
$(
seq
"
${
nj
}
"
)
;
do
split_scps+
=
"
${
_logdir
}
/keys.
${
JOB
}
.scp"
done
$FUNASR_DIR
/examples/aishell/paraformer/utils/split_scp.pl
"
${
key_file
}
"
${
split_scps
}
gpuid_list_array
=(
${
CUDA_VISIBLE_DEVICES
//,/
}
)
for
JOB
in
$(
seq
${
nj
}
)
;
do
{
id
=
$((
JOB-1
))
gpuid
=
${
gpuid_list_array
[
$id
]
}
echo
"
${
output_dir
}
"
export
CUDA_VISIBLE_DEVICES
=
${
gpuid
}
python ../../../funasr/bin/inference.py
\
--config-path
=
"
${
output_dir
}
"
\
--config-name
=
"config.yaml"
\
++init_param
=
"
${
output_dir
}
/
${
inference_checkpoint
}
"
\
++token_lists
=
'['''
${
token_list
}
''', '''
${
token_list2
}
''']'
\
++seg_dicts
=
'['''
${
lexicon_list
}
''', '''
${
lexicon_list
}
''']'
\
++frontend_conf.cmvn_file
=
"
${
cmvn_file
}
"
\
++keywords
=
"
\"
$keywords_string
"
\"
\
++input
=
"
${
_logdir
}
/keys.
${
JOB
}
.scp"
\
++output_dir
=
"
${
inference_dir
}
/
${
JOB
}
"
\
++device
=
"
${
inference_device
}
"
\
++ncpu
=
1
\
++disable_log
=
true
\
++batch_size
=
"
${
inference_batch_size
}
"
&>
${
_logdir
}
/log.
${
JOB
}
.txt
}
&
done
wait
for
f
in
detect detect2
;
do
if
[
-f
"
${
inference_dir
}
/
${
JOB
}
/
${
f
}
"
]
;
then
for
JOB
in
$(
seq
"
${
nj
}
"
)
;
do
cat
"
${
inference_dir
}
/
${
JOB
}
/
${
f
}
"
done
|
sort
-k1
>
"
${
inference_dir
}
/
${
f
}
"
fi
done
mkdir
-p
${
inference_dir
}
/task1
python funasr/utils/compute_det_ctc.py
\
--keywords
${
keywords_string
}
\
--test_data
${
test_data_dir
}
/wav.scp
\
--trans_data
${
test_data_dir
}
/text
\
--score_file
${
inference_dir
}
/detect
\
--stats_dir
${
inference_dir
}
/task1
mkdir
-p
${
inference_dir
}
/task2
python funasr/utils/compute_det_ctc.py
\
--keywords
${
keywords_string
}
\
--test_data
${
test_data_dir
}
/wav.scp
\
--trans_data
${
test_data_dir
}
/text
\
--score_file
${
inference_dir
}
/detect2
\
--stats_dir
${
inference_dir
}
/task2
done
fi
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/funasr
0 → 120000
View file @
70a8a9e0
../../../funasr
\ No newline at end of file
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/infer.sh
0 → 100644
View file @
70a8a9e0
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
# method1, inference from model hub
model
=
"iic/speech_charctc_kws_phone-xiaoyun_mt"
# for more input type, please ref to readme.md
input
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/KWS/pos_testset/kws_xiaoyunxiaoyun.wav"
keywords
=(
小云小云
)
keywords_string
=
$(
IFS
=
,
;
echo
"
${
keywords
[*]
}
"
)
echo
"keywords:
$keywords_string
"
python funasr/bin/inference.py
\
+model
=
${
model
}
\
+input
=
${
input
}
\
+output_dir
=
"./outputs/debug"
\
+device
=
"cpu"
\
++keywords
=
"
\"
$keywords_string
"
\"
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/infer_from_local.sh
0 → 100644
View file @
70a8a9e0
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
# method2, inference from local model
# for more input type, please ref to readme.md
input
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/KWS/pos_testset/kws_xiaoyunxiaoyun.wav"
output_dir
=
"./outputs/debug"
workspace
=
`
pwd
`
# download model
local_path_root
=
${
workspace
}
/modelscope_models
mkdir
-p
${
local_path_root
}
local_path
=
${
local_path_root
}
/speech_charctc_kws_phone-xiaoyun_mt
git clone https://www.modelscope.cn/iic/speech_charctc_kws_phone-xiaoyun_mt.git
${
local_path
}
device
=
"cuda:0"
# "cuda:0" for gpu0, "cuda:1" for gpu1, "cpu"
config
=
"inference_fsmn_4e_l10r2_280_200_fdim40_t2602_t4.yaml"
tokens
=
"
${
local_path
}
/funasr/tokens_2602.txt"
tokens2
=
"
${
local_path
}
/funasr/tokens_xiaoyun.txt"
seg_dict
=
"
${
local_path
}
/funasr/lexicon.txt"
init_param
=
"
${
local_path
}
/funasr/finetune_fsmn_4e_l10r2_280_200_fdim40_t2602_t4_xiaoyun_xiaoyun.pt"
cmvn_file
=
"
${
local_path
}
/funasr/am.mvn.dim40_l4r4"
keywords
=(
小云小云
)
keywords_string
=
$(
IFS
=
,
;
echo
"
${
keywords
[*]
}
"
)
echo
"keywords:
$keywords_string
"
python
-m
funasr.bin.inference
\
--config-path
"
${
local_path
}
/funasr"
\
--config-name
"
${
config
}
"
\
++init_param
=
"
${
init_param
}
"
\
++frontend_conf.cmvn_file
=
"
${
cmvn_file
}
"
\
++token_lists
=
'['''
${
tokens
}
''', '''
${
tokens2
}
''']'
\
++seg_dicts
=
'['''
${
seg_dict
}
''', '''
${
seg_dict
}
''']'
\
++input
=
"
${
input
}
"
\
++output_dir
=
"
${
output_dir
}
"
\
++device
=
"
${
device
}
"
\
++keywords
=
"
\"
$keywords_string
"
\"
FunASR/examples/industrial_data_pretraining/fsmn_kws_mt/path.sh
0 → 100755
View file @
70a8a9e0
export
FUNASR_DIR
=
$PWD
/../../..
# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export
PYTHONIOENCODING
=
UTF-8
export
PATH
=
$FUNASR_DIR
/funasr/bin:
$PATH
Prev
1
…
5
6
7
8
9
10
11
12
13
…
42
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment