Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
Paraformer_FunASR_pytorch
Commits
70a8a9e0
Commit
70a8a9e0
authored
Oct 03, 2024
by
wangwei990215
Browse files
initial commit
parents
Pipeline
#1738
failed with stages
in 0 seconds
Changes
827
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
653 additions
and
0 deletions
+653
-0
FunASR/examples/industrial_data_pretraining/sanm_kws_streaming/funasr
...les/industrial_data_pretraining/sanm_kws_streaming/funasr
+2
-0
FunASR/examples/industrial_data_pretraining/sanm_kws_streaming/infer.sh
...s/industrial_data_pretraining/sanm_kws_streaming/infer.sh
+34
-0
FunASR/examples/industrial_data_pretraining/sanm_kws_streaming/infer_from_local.sh
...l_data_pretraining/sanm_kws_streaming/infer_from_local.sh
+62
-0
FunASR/examples/industrial_data_pretraining/sanm_kws_streaming/path.sh
...es/industrial_data_pretraining/sanm_kws_streaming/path.sh
+5
-0
FunASR/examples/industrial_data_pretraining/scama/demo.py
FunASR/examples/industrial_data_pretraining/scama/demo.py
+46
-0
FunASR/examples/industrial_data_pretraining/scama/demo.sh
FunASR/examples/industrial_data_pretraining/scama/demo.sh
+10
-0
FunASR/examples/industrial_data_pretraining/seaco_paraformer/demo.py
...ples/industrial_data_pretraining/seaco_paraformer/demo.py
+44
-0
FunASR/examples/industrial_data_pretraining/seaco_paraformer/demo.sh
...ples/industrial_data_pretraining/seaco_paraformer/demo.sh
+18
-0
FunASR/examples/industrial_data_pretraining/seaco_paraformer/finetune.sh
.../industrial_data_pretraining/seaco_paraformer/finetune.sh
+83
-0
FunASR/examples/industrial_data_pretraining/sense_voice/demo.py
.../examples/industrial_data_pretraining/sense_voice/demo.py
+83
-0
FunASR/examples/industrial_data_pretraining/sense_voice/demo_libtorch.py
.../industrial_data_pretraining/sense_voice/demo_libtorch.py
+18
-0
FunASR/examples/industrial_data_pretraining/sense_voice/demo_onnx.py
...ples/industrial_data_pretraining/sense_voice/demo_onnx.py
+19
-0
FunASR/examples/industrial_data_pretraining/sense_voice/export.py
...xamples/industrial_data_pretraining/sense_voice/export.py
+16
-0
FunASR/examples/industrial_data_pretraining/sense_voice/finetune.sh
...mples/industrial_data_pretraining/sense_voice/finetune.sh
+70
-0
FunASR/examples/industrial_data_pretraining/transducer/demo.py
...R/examples/industrial_data_pretraining/transducer/demo.py
+17
-0
FunASR/examples/industrial_data_pretraining/uniasr/demo.py
FunASR/examples/industrial_data_pretraining/uniasr/demo.py
+30
-0
FunASR/examples/industrial_data_pretraining/uniasr/demo.sh
FunASR/examples/industrial_data_pretraining/uniasr/demo.sh
+10
-0
FunASR/examples/industrial_data_pretraining/whisper/demo.py
FunASR/examples/industrial_data_pretraining/whisper/demo.py
+30
-0
FunASR/examples/industrial_data_pretraining/whisper/demo_from_openai.py
...s/industrial_data_pretraining/whisper/demo_from_openai.py
+34
-0
FunASR/examples/industrial_data_pretraining/whisper/infer.sh
FunASR/examples/industrial_data_pretraining/whisper/infer.sh
+22
-0
No files found.
Too many changes to show.
To preserve performance only
827 of 827+
files are displayed.
Plain diff
Email patch
FunASR/examples/industrial_data_pretraining/sanm_kws_streaming/funasr
0 → 120000
View file @
70a8a9e0
../../../funasr
\ No newline at end of file
FunASR/examples/industrial_data_pretraining/sanm_kws_streaming/infer.sh
0 → 100644
View file @
70a8a9e0
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
# method1, inference from model hub
model
=
"iic/speech_sanm_kws_phone-xiaoyun-commands-online"
# for more input type, please ref to readme.md
input
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/KWS/pos_testset/kws_xiaoyunxiaoyun.wav"
keywords
=(
小云小云
)
keywords_string
=
$(
IFS
=
,
;
echo
"
${
keywords
[*]
}
"
)
echo
"keywords:
$keywords_string
"
python funasr/bin/inference.py
\
+model
=
${
model
}
\
+input
=
${
input
}
\
+output_dir
=
"./outputs/debug"
\
++chunk_size
=
'[4, 8, 4]'
\
++encoder_chunk_look_back
=
0
\
++decoder_chunk_look_back
=
0
\
+device
=
"cpu"
\
++keywords
=
"
\"
$keywords_string
"
\"
python funasr/bin/inference.py
\
+model
=
${
model
}
\
+input
=
${
input
}
\
+output_dir
=
"./outputs/debug"
\
++chunk_size
=
'[5, 10, 5]'
\
++encoder_chunk_look_back
=
0
\
++decoder_chunk_look_back
=
0
\
+device
=
"cpu"
\
++keywords
=
"
\"
$keywords_string
"
\"
FunASR/examples/industrial_data_pretraining/sanm_kws_streaming/infer_from_local.sh
0 → 100644
View file @
70a8a9e0
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
# method2, inference from local model
# for more input type, please ref to readme.md
input
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/KWS/pos_testset/kws_xiaoyunxiaoyun.wav"
output_dir
=
"./outputs/debug"
workspace
=
`
pwd
`
# download model
local_path_root
=
${
workspace
}
/modelscope_models
mkdir
-p
${
local_path_root
}
local_path
=
${
local_path_root
}
/speech_sanm_kws_phone-xiaoyun-commands-online
git clone https://www.modelscope.cn/iic/speech_sanm_kws_phone-xiaoyun-commands-online.git
${
local_path
}
device
=
"cpu"
# "cuda:0" for gpu0, "cuda:1" for gpu1, "cpu"
config
=
"inference_sanm_6e_320_256_fdim40_t2602_online.yaml"
tokens
=
"
${
local_path
}
/tokens_2602.txt"
seg_dict
=
"
${
local_path
}
/lexicon.txt"
init_param
=
"
${
local_path
}
/finetune_sanm_6e_320_256_fdim40_t2602_online_xiaoyun_commands.pt"
cmvn_file
=
"
${
local_path
}
/am.mvn.dim40_l3r3"
keywords
=(
小云小云
)
keywords_string
=
$(
IFS
=
,
;
echo
"
${
keywords
[*]
}
"
)
echo
"keywords:
$keywords_string
"
echo
"inference sanm streaming with chunk_size=[4, 8, 4]"
python
-m
funasr.bin.inference
\
--config-path
"
${
local_path
}
/"
\
--config-name
"
${
config
}
"
\
++init_param
=
"
${
init_param
}
"
\
++frontend_conf.cmvn_file
=
"
${
cmvn_file
}
"
\
++tokenizer_conf.token_list
=
"
${
tokens
}
"
\
++tokenizer_conf.seg_dict
=
"
${
seg_dict
}
"
\
++input
=
"
${
input
}
"
\
++output_dir
=
"
${
output_dir
}
"
\
++chunk_size
=
'[4, 8, 4]'
\
++encoder_chunk_look_back
=
0
\
++decoder_chunk_look_back
=
0
\
++device
=
"
${
device
}
"
\
++keywords
=
"
\"
$keywords_string
"
\"
echo
"inference sanm streaming with chunk_size=[5, 10, 5]"
python
-m
funasr.bin.inference
\
--config-path
"
${
local_path
}
/"
\
--config-name
"
${
config
}
"
\
++init_param
=
"
${
init_param
}
"
\
++frontend_conf.cmvn_file
=
"
${
cmvn_file
}
"
\
++tokenizer_conf.token_list
=
"
${
tokens
}
"
\
++tokenizer_conf.seg_dict
=
"
${
seg_dict
}
"
\
++input
=
"
${
input
}
"
\
++output_dir
=
"
${
output_dir
}
"
\
++chunk_size
=
'[5, 10, 5]'
\
++encoder_chunk_look_back
=
0
\
++decoder_chunk_look_back
=
0
\
++device
=
"
${
device
}
"
\
++keywords
=
"
\"
$keywords_string
"
\"
FunASR/examples/industrial_data_pretraining/sanm_kws_streaming/path.sh
0 → 100755
View file @
70a8a9e0
export
FUNASR_DIR
=
$PWD
/../../..
# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export
PYTHONIOENCODING
=
UTF-8
export
PATH
=
$FUNASR_DIR
/funasr/bin:
$PATH
FunASR/examples/industrial_data_pretraining/scama/demo.py
0 → 100644
View file @
70a8a9e0
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
from
funasr
import
AutoModel
chunk_size
=
[
5
,
10
,
5
]
# [0, 10, 5] 600ms, [0, 8, 4] 480ms
encoder_chunk_look_back
=
0
# number of chunks to lookback for encoder self-attention
decoder_chunk_look_back
=
0
# number of encoder chunks to lookback for decoder cross-attention
model
=
AutoModel
(
model
=
"/Users/zhifu/Downloads/modelscope_models/speech_SCAMA_asr-zh-cn-16k-common-vocab8358-streaming"
)
cache
=
{}
res
=
model
.
generate
(
input
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav"
,
chunk_size
=
chunk_size
,
encoder_chunk_look_back
=
encoder_chunk_look_back
,
decoder_chunk_look_back
=
decoder_chunk_look_back
,
)
print
(
res
)
import
soundfile
import
os
wav_file
=
os
.
path
.
join
(
model
.
model_path
,
"example/asr_example.wav"
)
speech
,
sample_rate
=
soundfile
.
read
(
wav_file
)
chunk_stride
=
chunk_size
[
1
]
*
960
# 600ms、480ms
cache
=
{}
total_chunk_num
=
int
(
len
((
speech
)
-
1
)
/
chunk_stride
+
1
)
for
i
in
range
(
total_chunk_num
):
speech_chunk
=
speech
[
i
*
chunk_stride
:
(
i
+
1
)
*
chunk_stride
]
is_final
=
i
==
total_chunk_num
-
1
res
=
model
.
generate
(
input
=
speech_chunk
,
cache
=
cache
,
is_final
=
is_final
,
chunk_size
=
chunk_size
,
encoder_chunk_look_back
=
encoder_chunk_look_back
,
decoder_chunk_look_back
=
decoder_chunk_look_back
,
)
print
(
res
)
FunASR/examples/industrial_data_pretraining/scama/demo.sh
0 → 100644
View file @
70a8a9e0
model
=
"iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online"
python funasr/bin/inference.py
\
+model
=
${
model
}
\
+input
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav"
\
+output_dir
=
"./outputs/debug"
\
+device
=
"cpu"
\
FunASR/examples/industrial_data_pretraining/seaco_paraformer/demo.py
0 → 100644
View file @
70a8a9e0
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
from
funasr
import
AutoModel
model
=
AutoModel
(
model
=
"iic/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
,
# vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
# punc_model="iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
# spk_model="iic/speech_campplus_sv_zh-cn_16k-common",
)
# example1
res
=
model
.
generate
(
input
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav"
,
hotword
=
"达摩院 魔搭"
,
# return_raw_text=True, # return raw text recognition results splited by space of equal length with timestamp
# preset_spk_num=2, # preset speaker num for speaker cluster model
# sentence_timestamp=True, # return sentence level information when spk_model is not given
)
print
(
res
)
"""
# tensor or numpy as input
# example2
import torchaudio
import os
wav_file = os.path.join(model.model_path, "example/asr_example.wav")
input_tensor, sample_rate = torchaudio.load(wav_file)
input_tensor = input_tensor.mean(0)
res = model.generate(input=[input_tensor], batch_size_s=300, is_final=True)
# example3
import soundfile
wav_file = os.path.join(model.model_path, "example/asr_example.wav")
speech, sample_rate = soundfile.read(wav_file)
res = model.generate(input=[speech], batch_size_s=300, is_final=True)
"""
FunASR/examples/industrial_data_pretraining/seaco_paraformer/demo.sh
0 → 100644
View file @
70a8a9e0
model
=
"iic/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
vad_model
=
"iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
vad_model_revision
=
"master"
punc_model
=
"iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
punc_model_revision
=
"master"
python funasr/bin/inference.py
\
+model
=
${
model
}
\
+vad_model
=
${
vad_model
}
\
+vad_model_revision
=
${
vad_model_revision
}
\
+punc_model
=
${
punc_model
}
\
+punc_model_revision
=
${
punc_model_revision
}
\
+input
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav"
\
+output_dir
=
"./outputs/debug"
\
+device
=
"cpu"
\
+
"hotword='达摩院 魔搭'"
FunASR/examples/industrial_data_pretraining/seaco_paraformer/finetune.sh
0 → 100644
View file @
70a8a9e0
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
workspace
=
`
pwd
`
# which gpu to train or finetune
export
CUDA_VISIBLE_DEVICES
=
"0,1"
gpu_num
=
$(
echo
$CUDA_VISIBLE_DEVICES
|
awk
-F
","
'{print NF}'
)
# model_name from model_hub, or model_dir in local path
## option 1, download model automatically
model_name_or_model_dir
=
"iic/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
## option 2, download model by git
#local_path_root=${workspace}/modelscope_models
#mkdir -p ${local_path_root}/${model_name_or_model_dir}
#git clone https://www.modelscope.cn/${model_name_or_model_dir}.git ${local_path_root}/${model_name_or_model_dir}
#model_name_or_model_dir=${local_path_root}/${model_name_or_model_dir}
# data dir, which contains: train.json, val.json
data_dir
=
"../../../data/list"
train_data
=
"
${
data_dir
}
/train.jsonl"
val_data
=
"
${
data_dir
}
/val.jsonl"
# generate train.jsonl and val.jsonl from wav.scp and text.txt
scp2jsonl
\
++scp_file_list
=
'["../../../data/list/train_wav.scp", "../../../data/list/train_text.txt"]'
\
++data_type_list
=
'["source", "target"]'
\
++jsonl_file_out
=
"
${
train_data
}
"
scp2jsonl
\
++scp_file_list
=
'["../../../data/list/val_wav.scp", "../../../data/list/val_text.txt"]'
\
++data_type_list
=
'["source", "target"]'
\
++jsonl_file_out
=
"
${
val_data
}
"
# exp output dir
output_dir
=
"./outputs"
log_file
=
"
${
output_dir
}
/log.txt"
mkdir
-p
${
output_dir
}
echo
"log_file:
${
log_file
}
"
DISTRIBUTED_ARGS
=
"
--nnodes
${
WORLD_SIZE
:-
1
}
\
--nproc_per_node
$gpu_num
\
--node_rank
${
RANK
:-
0
}
\
--master_addr
${
MASTER_ADDR
:-
127
.0.0.1
}
\
--master_port
${
MASTER_PORT
:-
26669
}
"
echo
$DISTRIBUTED_ARGS
torchrun
$DISTRIBUTED_ARGS
\
../../../funasr/bin/train_ds.py
\
++model
=
"
${
model_name_or_model_dir
}
"
\
++train_data_set_list
=
"
${
train_data
}
"
\
++valid_data_set_list
=
"
${
val_data
}
"
\
++dataset
=
"AudioDatasetHotword"
\
++dataset_conf.index_ds
=
"IndexDSJsonl"
\
++dataset_conf.data_split_num
=
1
\
++dataset_conf.batch_sampler
=
"BatchSampler"
\
++dataset_conf.batch_size
=
6000
\
++dataset_conf.sort_size
=
1024
\
++dataset_conf.batch_type
=
"token"
\
++dataset_conf.num_workers
=
4
\
++train_conf.max_epoch
=
50
\
++train_conf.log_interval
=
1
\
++train_conf.resume
=
true
\
++train_conf.validate_interval
=
2000
\
++train_conf.save_checkpoint_interval
=
2000
\
++train_conf.avg_keep_nbest_models_type
=
'loss'
\
++train_conf.keep_nbest_models
=
20
\
++train_conf.avg_nbest_model
=
10
\
++train_conf.use_deepspeed
=
false
\
++train_conf.deepspeed_config
=
${
deepspeed_config
}
\
++optim_conf.lr
=
0.0002
\
++output_dir
=
"
${
output_dir
}
"
&>
${
log_file
}
\ No newline at end of file
FunASR/examples/industrial_data_pretraining/sense_voice/demo.py
0 → 100644
View file @
70a8a9e0
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/FunAudioLLM/SenseVoice). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
from
funasr
import
AutoModel
from
funasr.utils.postprocess_utils
import
rich_transcription_postprocess
model_dir
=
"iic/SenseVoiceSmall"
model
=
AutoModel
(
model
=
model_dir
,
vad_model
=
"fsmn-vad"
,
vad_kwargs
=
{
"max_single_segment_time"
:
30000
},
device
=
"cuda:0"
,
)
# en
res
=
model
.
generate
(
input
=
f
"
{
model
.
model_path
}
/example/en.mp3"
,
cache
=
{},
language
=
"auto"
,
# "zn", "en", "yue", "ja", "ko", "nospeech"
use_itn
=
True
,
batch_size_s
=
60
,
merge_vad
=
True
,
#
merge_length_s
=
15
,
)
text
=
rich_transcription_postprocess
(
res
[
0
][
"text"
])
print
(
text
)
# zh
res
=
model
.
generate
(
input
=
f
"
{
model
.
model_path
}
/example/zh.mp3"
,
cache
=
{},
language
=
"auto"
,
# "zn", "en", "yue", "ja", "ko", "nospeech"
use_itn
=
True
,
batch_size_s
=
60
,
merge_vad
=
True
,
#
merge_length_s
=
15
,
)
text
=
rich_transcription_postprocess
(
res
[
0
][
"text"
])
print
(
text
)
# yue
res
=
model
.
generate
(
input
=
f
"
{
model
.
model_path
}
/example/yue.mp3"
,
cache
=
{},
language
=
"auto"
,
# "zn", "en", "yue", "ja", "ko", "nospeech"
use_itn
=
True
,
batch_size_s
=
60
,
merge_vad
=
True
,
#
merge_length_s
=
15
,
)
text
=
rich_transcription_postprocess
(
res
[
0
][
"text"
])
print
(
text
)
# ja
res
=
model
.
generate
(
input
=
f
"
{
model
.
model_path
}
/example/ja.mp3"
,
cache
=
{},
language
=
"auto"
,
# "zn", "en", "yue", "ja", "ko", "nospeech"
use_itn
=
True
,
batch_size_s
=
60
,
merge_vad
=
True
,
#
merge_length_s
=
15
,
)
text
=
rich_transcription_postprocess
(
res
[
0
][
"text"
])
print
(
text
)
# ko
res
=
model
.
generate
(
input
=
f
"
{
model
.
model_path
}
/example/ko.mp3"
,
cache
=
{},
language
=
"auto"
,
# "zn", "en", "yue", "ja", "ko", "nospeech"
use_itn
=
True
,
batch_size_s
=
60
,
merge_vad
=
True
,
#
merge_length_s
=
15
,
)
text
=
rich_transcription_postprocess
(
res
[
0
][
"text"
])
print
(
text
)
FunASR/examples/industrial_data_pretraining/sense_voice/demo_libtorch.py
0 → 100644
View file @
70a8a9e0
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/FunAudioLLM/SenseVoice). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
from
pathlib
import
Path
from
funasr_torch
import
SenseVoiceSmall
from
funasr_torch.utils.postprocess_utils
import
rich_transcription_postprocess
model_dir
=
"iic/SenseVoiceSmall"
model
=
SenseVoiceSmall
(
model_dir
,
batch_size
=
10
,
device
=
"cuda:0"
)
wav_or_scp
=
[
"{}/.cache/modelscope/hub/{}/example/en.mp3"
.
format
(
Path
.
home
(),
model_dir
)]
res
=
model
(
wav_or_scp
,
language
=
"auto"
,
use_itn
=
True
)
print
([
rich_transcription_postprocess
(
i
)
for
i
in
res
])
FunASR/examples/industrial_data_pretraining/sense_voice/demo_onnx.py
0 → 100644
View file @
70a8a9e0
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/FunAudioLLM/SenseVoice). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
from
pathlib
import
Path
from
funasr_onnx
import
SenseVoiceSmall
from
funasr_onnx.utils.postprocess_utils
import
rich_transcription_postprocess
model_dir
=
"iic/SenseVoiceSmall"
model
=
SenseVoiceSmall
(
model_dir
,
batch_size
=
10
,
quantize
=
True
)
# inference
wav_or_scp
=
[
"{}/.cache/modelscope/hub/{}/example/en.mp3"
.
format
(
Path
.
home
(),
model_dir
)]
res
=
model
(
wav_or_scp
,
language
=
"auto"
,
use_itn
=
True
)
print
([
rich_transcription_postprocess
(
i
)
for
i
in
res
])
FunASR/examples/industrial_data_pretraining/sense_voice/export.py
0 → 100644
View file @
70a8a9e0
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/FunAudioLLM/SenseVoice). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
from
funasr
import
AutoModel
model_dir
=
"iic/SenseVoiceSmall"
model
=
AutoModel
(
model
=
model_dir
,
device
=
"cuda:0"
,
)
res
=
model
.
export
(
type
=
"onnx"
,
quantize
=
False
)
\ No newline at end of file
FunASR/examples/industrial_data_pretraining/sense_voice/finetune.sh
0 → 100644
View file @
70a8a9e0
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
workspace
=
`
pwd
`
# which gpu to train or finetune
export
CUDA_VISIBLE_DEVICES
=
"0,1"
gpu_num
=
$(
echo
$CUDA_VISIBLE_DEVICES
|
awk
-F
","
'{print NF}'
)
# model_name from model_hub, or model_dir in local path
## option 1, download model automatically
model_name_or_model_dir
=
"iic/SenseVoiceCTC"
## option 2, download model by git
#local_path_root=${workspace}/modelscope_models
#mkdir -p ${local_path_root}/${model_name_or_model_dir}
#git clone https://www.modelscope.cn/${model_name_or_model_dir}.git ${local_path_root}/${model_name_or_model_dir}
#model_name_or_model_dir=${local_path_root}/${model_name_or_model_dir}
# data dir, which contains: train.json, val.json
train_data
=
${
workspace
}
/data/train_example.jsonl
val_data
=
${
workspace
}
/data/val_example.jsonl
# exp output dir
output_dir
=
"./outputs"
log_file
=
"
${
output_dir
}
/log.txt"
deepspeed_config
=
${
workspace
}
/../../ds_stage1.json
mkdir
-p
${
output_dir
}
echo
"log_file:
${
log_file
}
"
DISTRIBUTED_ARGS
=
"
--nnodes
${
WORLD_SIZE
:-
1
}
\
--nproc_per_node
$gpu_num
\
--node_rank
${
RANK
:-
0
}
\
--master_addr
${
MASTER_ADDR
:-
127
.0.0.1
}
\
--master_port
${
MASTER_PORT
:-
26669
}
"
echo
$DISTRIBUTED_ARGS
# funasr trainer path
train_tool
=
`
dirname
$(
which funasr
)
`
/train_ds.py
torchrun
$DISTRIBUTED_ARGS
\
${
train_tool
}
\
++model
=
"
${
model_name_or_model_dir
}
"
\
++train_data_set_list
=
"
${
train_data
}
"
\
++valid_data_set_list
=
"
${
val_data
}
"
\
++dataset_conf.data_split_num
=
1
\
++dataset_conf.batch_sampler
=
"BatchSampler"
\
++dataset_conf.batch_size
=
6000
\
++dataset_conf.sort_size
=
1024
\
++dataset_conf.batch_type
=
"token"
\
++dataset_conf.num_workers
=
4
\
++train_conf.max_epoch
=
50
\
++train_conf.log_interval
=
1
\
++train_conf.resume
=
true
\
++train_conf.validate_interval
=
2000
\
++train_conf.save_checkpoint_interval
=
2000
\
++train_conf.keep_nbest_models
=
20
\
++train_conf.avg_nbest_model
=
10
\
++train_conf.use_deepspeed
=
false
\
++train_conf.deepspeed_config
=
${
deepspeed_config
}
\
++optim_conf.lr
=
0.0002
\
++output_dir
=
"
${
output_dir
}
"
&>
${
log_file
}
\ No newline at end of file
FunASR/examples/industrial_data_pretraining/transducer/demo.py
0 → 100644
View file @
70a8a9e0
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
from
funasr
import
AutoModel
# Transducer, BAT and RWKV_BAT models are just same to use, use the correct model_revision
# https://modelscope.cn/models?name=transducer&page=1&tasks=auto-speech-recognition&type=audio
model
=
AutoModel
(
model
=
"iic/speech_bat_asr-zh-cn-16k-aishell1-vocab4234-pytorch"
,
)
res
=
model
.
generate
(
input
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav"
)
print
(
res
)
FunASR/examples/industrial_data_pretraining/uniasr/demo.py
0 → 100644
View file @
70a8a9e0
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
from
funasr
import
AutoModel
model
=
AutoModel
(
model
=
"iic/speech_UniASR-large_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline"
,
)
res
=
model
.
generate
(
input
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav"
)
print
(
res
)
""" can not use currently
from funasr import AutoFrontend
frontend = AutoFrontend(model="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch")
fbanks = frontend(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav", batch_size=2)
for batch_idx, fbank_dict in enumerate(fbanks):
res = model.generate(**fbank_dict)
print(res)
"""
FunASR/examples/industrial_data_pretraining/uniasr/demo.sh
0 → 100644
View file @
70a8a9e0
model
=
"iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
python funasr/bin/inference.py
\
+model
=
${
model
}
\
+input
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav"
\
+output_dir
=
"./outputs/debug"
\
+device
=
"cpu"
\
FunASR/examples/industrial_data_pretraining/whisper/demo.py
0 → 100644
View file @
70a8a9e0
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
# To install requirements: pip3 install -U openai-whisper
from
funasr
import
AutoModel
model
=
AutoModel
(
model
=
"iic/Whisper-large-v3"
,
vad_model
=
"iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
,
vad_kwargs
=
{
"max_single_segment_time"
:
30000
},
)
DecodingOptions
=
{
"task"
:
"transcribe"
,
"language"
:
None
,
"beam_size"
:
None
,
"fp16"
:
True
,
"without_timestamps"
:
False
,
"prompt"
:
None
,
}
res
=
model
.
generate
(
DecodingOptions
=
DecodingOptions
,
batch_size_s
=
0
,
input
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav"
,
)
print
(
res
)
FunASR/examples/industrial_data_pretraining/whisper/demo_from_openai.py
0 → 100644
View file @
70a8a9e0
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
# To install requirements: pip3 install -U openai-whisper
from
funasr
import
AutoModel
# model = AutoModel(model="Whisper-small", hub="openai")
# model = AutoModel(model="Whisper-medium", hub="openai")
# model = AutoModel(model="Whisper-large-v2", hub="openai")
model
=
AutoModel
(
model
=
"Whisper-large-v3"
,
vad_model
=
"iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
,
vad_kwargs
=
{
"max_single_segment_time"
:
30000
},
hub
=
"openai"
,
)
DecodingOptions
=
{
"task"
:
"transcribe"
,
"language"
:
None
,
"beam_size"
:
None
,
"fp16"
:
True
,
"without_timestamps"
:
False
,
"prompt"
:
None
,
}
res
=
model
.
generate
(
DecodingOptions
=
DecodingOptions
,
batch_size_s
=
0
,
input
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav"
,
)
print
(
res
)
FunASR/examples/industrial_data_pretraining/whisper/infer.sh
0 → 100644
View file @
70a8a9e0
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
# MIT License (https://opensource.org/licenses/MIT)
# To install requirements: pip3 install -U openai-whisper
# method1, inference from model hub
# for more input type, please ref to readme.md
input
=
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav"
output_dir
=
"./outputs/debug"
model
=
"iic/speech_whisper-large_asr_multilingual"
device
=
"cuda:0"
# "cuda:0" for gpu0, "cuda:1" for gpu1, "cpu"
python
-m
funasr.bin.inference
\
++model
=
${
model
}
\
++input
=
"
${
input
}
"
\
++output_dir
=
"
${
output_dir
}
"
\
++device
=
"
${
device
}
"
\
Prev
1
…
9
10
11
12
13
14
15
16
17
…
42
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment