Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
SpeechT5_pytorch
Commits
12c90639
"vscode:/vscode.git/clone" did not exist on "f23f8a0688557e3ca3cf8bbf8e7669eab9912434"
Commit
12c90639
authored
Sep 28, 2024
by
“change”
Browse files
init
parent
417b607b
Changes
350
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1317 additions
and
0 deletions
+1317
-0
Speech2S/speech2s/scripts copy/tune_speechut_asr/finetune960h_large_edctc.sh
...cripts copy/tune_speechut_asr/finetune960h_large_edctc.sh
+45
-0
Speech2S/speech2s/scripts copy/tune_speechut_asr/finetune_base_edctc.sh
...h2s/scripts copy/tune_speechut_asr/finetune_base_edctc.sh
+45
-0
Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_edctc.sh
...peech2s/scripts copy/tune_speechut_asr/inference_edctc.sh
+61
-0
Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_edctclm.sh
...ech2s/scripts copy/tune_speechut_asr/inference_edctclm.sh
+66
-0
Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_lm_nj.sh
...peech2s/scripts copy/tune_speechut_asr/inference_lm_nj.sh
+74
-0
Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_nj.sh
...S/speech2s/scripts copy/tune_speechut_asr/inference_nj.sh
+69
-0
Speech2S/speech2s/scripts copy/tune_speechut_st/finetune_base_mustc_enxx.sh
...scripts copy/tune_speechut_st/finetune_base_mustc_enxx.sh
+77
-0
Speech2S/speech2s/scripts copy/tune_speechut_st/inference_st.sh
...2S/speech2s/scripts copy/tune_speechut_st/inference_st.sh
+44
-0
Speech2S/speech2s/scripts/__init__.py
Speech2S/speech2s/scripts/__init__.py
+0
-0
Speech2S/speech2s/scripts/average_checkpoints.py
Speech2S/speech2s/scripts/average_checkpoints.py
+160
-0
Speech2S/speech2s/scripts/build_sym_alignment.py
Speech2S/speech2s/scripts/build_sym_alignment.py
+97
-0
Speech2S/speech2s/scripts/compare_namespaces.py
Speech2S/speech2s/scripts/compare_namespaces.py
+46
-0
Speech2S/speech2s/scripts/compound_split_bleu.sh
Speech2S/speech2s/scripts/compound_split_bleu.sh
+20
-0
Speech2S/speech2s/scripts/constraints/extract.py
Speech2S/speech2s/scripts/constraints/extract.py
+90
-0
Speech2S/speech2s/scripts/constraints/validate.py
Speech2S/speech2s/scripts/constraints/validate.py
+34
-0
Speech2S/speech2s/scripts/convert_dictionary.lua
Speech2S/speech2s/scripts/convert_dictionary.lua
+34
-0
Speech2S/speech2s/scripts/convert_model.lua
Speech2S/speech2s/scripts/convert_model.lua
+108
-0
Speech2S/speech2s/scripts/count_docs.py
Speech2S/speech2s/scripts/count_docs.py
+58
-0
Speech2S/speech2s/scripts/read_binarized.py
Speech2S/speech2s/scripts/read_binarized.py
+48
-0
Speech2S/speech2s/scripts/rm_pt.py
Speech2S/speech2s/scripts/rm_pt.py
+141
-0
No files found.
Too many changes to show.
To preserve performance only
350 of 350+
files are displayed.
Plain diff
Email patch
Speech2S/speech2s/scripts copy/tune_speechut_asr/finetune960h_large_edctc.sh
0 → 100644
View file @
12c90639
# ####################################
# SpeechUT Large model #
# ####################################
[
$#
-lt
3
]
&&
echo
"Usage:
$0
<model_path> <data_dir> <cpt_tag> [mount=
${
PWD
}
] [world_size=8] [update_freq=3]"
&&
exit
1
[
${
PWD
##*/
}
!=
SpeechUT
]
&&
echo
"Error: dir not match! Switch to SpeechUT/ and run it again!"
&&
exit
1
w2v_path
=
$1
DATA_DIR
=
$2
cpt
=
$3
mount
=
$4
world_size
=
$5
update_freq
=
$6
[
-z
$mount
]
&&
mount
=
${
PWD
}
[
-z
$world_size
]
&&
world_size
=
8
[
-z
$update_freq
]
&&
update_freq
=
3
CODE_ROOT
=
${
PWD
}
exp_name
=
${
w2v_path
%/*
}
exp_name
=
${
exp_name
##*/
}
MODEL_DIR
=
"
${
mount
}
/exp/finetune_asr/
$exp_name
/960h_edctc80k_from_
${
cpt
}
_bz3.3m_lr1e-5"
[
-d
$MODEL_DIR
]
||
mkdir
-p
$MODEL_DIR
python
$CODE_ROOT
/fairseq/fairseq_cli/hydra_train.py
\
--config-dir
$CODE_ROOT
/speechut/config/finetune_asr
\
--config-name
speechut_large_960h
\
common.user_dir
=
$CODE_ROOT
/speechut
\
\
task.data
=
$DATA_DIR
\
task.label_dir
=
$DATA_DIR
\
model.w2v_path
=
${
w2v_path
}
\
\
optimization.lr
=[
0.00001]
\
optimization.max_update
=
80000
\
dataset.max_tokens
=
1100000
\
optimization.update_freq
=[
${
update_freq
}
]
\
distributed_training.distributed_world_size
=
${
world_size
}
\
\
dataset.train_subset
=
"train_960"
\
dataset.valid_subset
=
"dev_other"
\
\
common.tensorboard_logdir
=
$MODEL_DIR
\
checkpoint.save_dir
=
$MODEL_DIR
\
hydra.run.dir
=
$MODEL_DIR
\
hydra.job.name
=
960h_edctc80k_from_
${
cpt
}
_bz3.3m_lr1e-5
Speech2S/speech2s/scripts copy/tune_speechut_asr/finetune_base_edctc.sh
0 → 100644
View file @
12c90639
# ####################################
# SpeechUT Base model #
# ####################################
[
$#
-lt
3
]
&&
echo
"Usage:
$0
<model_path> <data_dir> <cpt_tag> [mount=
${
PWD
}
] [world_size=8] [update_freq=2]"
&&
exit
1
[
${
PWD
##*/
}
!=
SpeechUT
]
&&
echo
"Error: dir not match! Switch to SpeechUT/ and run it again!"
&&
exit
1
w2v_path
=
$1
DATA_DIR
=
$2
cpt
=
$3
mount
=
$4
world_size
=
$5
update_freq
=
$6
[
-z
$mount
]
&&
mount
=
${
PWD
}
[
-z
$world_size
]
&&
world_size
=
8
[
-z
$update_freq
]
&&
update_freq
=
2
CODE_ROOT
=
${
PWD
}
exp_name
=
${
w2v_path
%/*
}
exp_name
=
${
exp_name
##*/
}
MODEL_DIR
=
"
${
mount
}
/exp/finetune_asr/
$exp_name
/edctc40k_from_
${
cpt
}
_bz2.6m_lr1e-5"
[
-d
$MODEL_DIR
]
||
mkdir
-p
$MODEL_DIR
python
$CODE_ROOT
/fairseq/fairseq_cli/hydra_train.py
\
--config-dir
$CODE_ROOT
/speechut/config/finetune_asr
\
--config-name
speechut_base_100h
\
common.user_dir
=
$CODE_ROOT
/speechut
\
\
task.data
=
$DATA_DIR
\
task.label_dir
=
$DATA_DIR
\
model.w2v_path
=
${
w2v_path
}
\
\
optimization.lr
=[
0.00001]
\
optimization.max_update
=
40000
\
dataset.max_tokens
=
1300000
\
optimization.update_freq
=[
${
update_freq
}
]
\
distributed_training.distributed_world_size
=
${
world_size
}
\
\
dataset.train_subset
=
"train_clean_100"
\
dataset.valid_subset
=
"dev_other"
\
\
common.tensorboard_logdir
=
$MODEL_DIR
\
checkpoint.save_dir
=
$MODEL_DIR
\
hydra.run.dir
=
$MODEL_DIR
\
hydra.job.name
=
edctc40k_from_
${
cpt
}
_bz2.6m_lr1e-5
Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_edctc.sh
0 → 100644
View file @
12c90639
#####################################
# SpeechUT ASR model #
#####################################
[
$#
-lt
2
]
&&
echo
"Usage:
$0
<model_path> <data_dir> [gen-set=dev_other] [beam_size=10] [ctc_weight=0.2] [--normalize]"
&&
exit
1
[
${
PWD
##*/
}
!=
SpeechUT
]
&&
echo
"Error: dir not match! Switch to SpeechUT/ and run it again!"
&&
exit
1
model_path
=
$1
DATA_DIR
=
$2
gen_set
=
$3
beam_size
=
$4
ctc_weight
=
$5
extra
=
$6
[
-z
$extra
]
&&
echo
"Assert decoding base model! If you are decoding large model, please add '--normalize' at the end..."
[
-z
$gen_set
]
&&
gen_set
=
"dev_other"
[
-z
$beam_size
]
&&
beam_size
=
10
[
-z
$ctc_weight
]
&&
ctc_weight
=
0.2
[
$ctc_weight
==
0
]
&&
[
$beam_size
!=
1
]
&&
echo
"Change beam size to 1 as no ctc-decoding used..."
&&
beam_size
=
1
[
$ctc_weight
!=
0
]
&&
extra
=
"
$extra
--batch-size 1"
src_dir
=
${
model_path
%/*
}
cpt
=
${
model_path
##*/
}
cpt
=
${
cpt
%.*
}
CODE_ROOT
=
${
PWD
}
for
subset
in
${
gen_set
//,/
}
;
do
results_path
=
$src_dir
/decode_
${
cpt
}
/beam
${
beam_size
}
_ctc
${
ctc_weight
}
/
${
subset
}
_
${
world_size
}
_
${
rank
}
[
!
-d
$results_path
]
&&
mkdir
-p
$results_path
python
$CODE_ROOT
/fairseq/fairseq_cli/generate.py
$DATA_DIR
\
--user-dir
$CODE_ROOT
/speechut
\
--label-dir
${
DATA_DIR
}
\
--labels
'["ltr"]'
\
--single-target
\
--post-process
letter
\
--gen-subset
${
subset
}
\
--max-tokens
2000000
\
\
--task
joint_sc2t_pretraining
\
--add-decoder-target
\
--fine-tuning
\
--pad-audio
\
--random-crop
\
\
--ctc-weight
${
ctc_weight
}
$extra
\
--beam
${
beam_size
}
\
\
--path
${
model_path
}
\
--results-path
$results_path
\
\
--scoring
wer
--max-len-a
0.00078125
--max-len-b
200
\
&
done
wait
for
subset
in
${
gen_set
//,/
}
;
do
results_path
=
$src_dir
/decode_
${
cpt
}
/beam
${
beam_size
}
_ctc
${
ctc_weight
}
/
${
subset
}
_
${
world_size
}
_
${
rank
}
echo
$results_path
tail
-n
1
$results_path
/generate-
*
.txt
done
Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_edctclm.sh
0 → 100644
View file @
12c90639
#####################################
# SpeechUT ASR model #
#####################################
[
$#
-lt
2
]
&&
echo
"Usage:
$0
<model_path> <data_dir> [gen-set=dev_other] [beam_size=30] [ctc_weight=0.3] [lm_weight=0.7] [lm_path] [--normalize]"
&&
exit
1
[
${
PWD
##*/
}
!=
SpeechUT
]
&&
echo
"Error: dir not match! Switch to SpeechUT/ and run it again!"
&&
exit
1
model_path
=
$1
DATA_DIR
=
$2
gen_set
=
$3
beam_size
=
$4
ctc_weight
=
$5
lm_weight
=
$6
lm_path
=
$7
extra
=
$8
[
-z
$extra
]
&&
echo
"Assert decoding base model! If you are decoding large model, please add '--normalize' at the end..."
[
-z
$gen_set
]
&&
gen_set
=
"dev_other"
[
-z
$beam_size
]
&&
beam_size
=
30
[
-z
$ctc_weight
]
&&
ctc_weight
=
0.3
[
-z
$lm_weight
]
&&
lm_weight
=
0.7
[
-z
$lm_path
]
&&
lm_path
=
"/mnt/default/v-junyiao/librispeech/lm/lm_ctc_form/checkpoint_best.pt"
[
$ctc_weight
==
0
]
&&
[
$beam_size
!=
1
]
&&
echo
"Change beam size to 1 and lm_weight to 0 as no ctc-decoding used..."
&&
beam_size
=
1
&&
lm_weight
=
0
[
$ctc_weight
!=
0
]
&&
extra
=
"
$extra
--batch-size 1"
src_dir
=
${
model_path
%/*
}
cpt
=
${
model_path
##*/
}
cpt
=
${
cpt
%.*
}
CODE_ROOT
=
${
PWD
}
for
subset
in
${
gen_set
//,/
}
;
do
results_path
=
$src_dir
/decode_
${
cpt
}
/beam
${
beam_size
}
_ctc
${
ctc_weight
}
_lm
${
lm_weight
}
/
${
subset
}
_
${
world_size
}
_
${
rank
}
[
!
-d
$results_path
]
&&
mkdir
-p
$results_path
python
$CODE_ROOT
/fairseq/fairseq_cli/generate.py
$DATA_DIR
\
--user-dir
$CODE_ROOT
/speechut
\
--label-dir
${
DATA_DIR
}
\
--labels
'["ltr"]'
\
--single-target
\
--post-process
letter
\
--gen-subset
${
subset
}
\
--max-tokens
800000
\
\
--task
joint_sc2t_pretraining
\
--add-decoder-target
\
--fine-tuning
\
--pad-audio
\
--random-crop
\
\
--ctc-weight
${
ctc_weight
}
$extra
\
--lm-weight
${
lm_weight
}
--lm-path
${
lm_path
}
\
--beam
${
beam_size
}
\
\
--path
${
model_path
}
\
--results-path
${
results_path
}
\
\
--scoring
wer
--max-len-a
0.00078125
--max-len-b
200
\
&
done
wait
for
subset
in
${
gen_set
//,/
}
;
do
results_path
=
$src_dir
/decode_
${
cpt
}
/beam
${
beam_size
}
_ctc
${
ctc_weight
}
_lm
${
lm_weight
}
/
${
subset
}
_
${
world_size
}
_
${
rank
}
echo
$results_path
tail
-n
1
$results_path
/generate-
*
.txt
done
Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_lm_nj.sh
0 → 100644
View file @
12c90639
#####################################
# SpeechUT ASR model #
#####################################
[
$#
-lt
2
]
&&
echo
"Usage:
$0
<model_path> <data_dir> [gen-set=dev_other] [beam_size=30] [ctc_weight=0.3] [lm_weight=0.7] [lm_path] [nj=8] [ngpu=8] [--normalize]"
&&
exit
1
[
${
PWD
##*/
}
!=
SpeechUT
]
&&
echo
"Error: dir not match! Switch to SpeechUT/ and run it again!"
&&
exit
1
model_path
=
$1
DATA_DIR
=
$2
gen_set
=
$3
beam_size
=
$4
ctc_weight
=
$5
lm_weight
=
$6
lm_path
=
$7
nj
=
$8
ngpu
=
$9
extra
=
${
10
}
[
-z
$extra
]
&&
echo
"Assert decoding base model! If you are decoding large model, please add '--normalize' at the end..."
[
-z
$gen_set
]
&&
gen_set
=
"dev_other"
[
-z
$beam_size
]
&&
beam_size
=
30
[
-z
$ctc_weight
]
&&
ctc_weight
=
0.3
[
-z
$lm_weight
]
&&
lm_weight
=
0.7
[
-z
$lm_path
]
&&
lm_path
=
"/mnt/default/v-junyiao/librispeech/lm/lm_ctc_form/checkpoint_best.pt"
[
$ctc_weight
==
0
]
&&
[
$beam_size
!=
1
]
&&
echo
"Change beam size to 1 and lm_weight to 0 as no ctc-decoding used..."
&&
beam_size
=
1
&&
lm_weight
=
0
[
$ctc_weight
!=
0
]
&&
extra
=
"
$extra
--batch-size 1"
[
-z
$nj
]
&&
nj
=
8
[
-z
$ngpu
]
&&
ngpu
=
8
src_dir
=
${
model_path
%/*
}
cpt
=
${
model_path
##*/
}
cpt
=
${
cpt
%.*
}
CODE_ROOT
=
${
PWD
}
world_size
=
$nj
for
rank
in
$(
seq
0
$((
nj
-
1
))
)
;
do
export
CUDA_VISIBLE_DEVICES
=
$((
rank
%
$ngpu
))
for
subset
in
${
gen_set
//,/
}
;
do
results_path
=
$src_dir
/decode_
${
cpt
}
/beam
${
beam_size
}
_ctc
${
ctc_weight
}
_lm
${
lm_weight
}
/
${
subset
}
_
${
world_size
}
_
${
rank
}
[
!
-d
$results_path
]
&&
mkdir
-p
$results_path
python
$CODE_ROOT
/fairseq/fairseq_cli/generate.py
$DATA_DIR
\
--user-dir
$CODE_ROOT
/speechut
\
--label-dir
${
DATA_DIR
}
\
--labels
'["ltr"]'
\
--single-target
\
--post-process
letter
\
--gen-subset
${
subset
}
\
--max-tokens
800000
\
\
--task
joint_sc2t_pretraining
\
--add-decoder-target
\
--fine-tuning
\
--pad-audio
\
--random-crop
\
\
--ctc-weight
${
ctc_weight
}
$extra
\
--lm-weight
${
lm_weight
}
--lm-path
${
lm_path
}
\
--beam
${
beam_size
}
\
\
--path
${
model_path
}
\
--results-path
$results_path
\
\
--scoring
wer
--max-len-a
0.00078125
--max-len-b
200
\
--distributed-world-size
${
world_size
}
--distributed-rank
${
rank
}
\
&
done
done
wait
for
subset
in
${
gen_set
//,/
}
;
do
results_dir
=
$src_dir
/decode_
${
cpt
}
/beam
${
beam_size
}
_ctc
${
ctc_weight
}
_lm
${
lm_weight
}
cat
$results_dir
/
${
subset
}
_
${
world_size
}
_
*
/generate-
${
subset
}
.txt |
grep
-v
"^Generate"
>
$results_dir
/generate-
${
subset
}
.all.txt
done
Speech2S/speech2s/scripts copy/tune_speechut_asr/inference_nj.sh
0 → 100644
View file @
12c90639
#####################################
# SpeechUT ASR model #
#####################################
[
$#
-lt
2
]
&&
echo
"Usage:
$0
<model_path> <data_dir> [gen-set=dev_other] [beam_size=10] [ctc_weight=0.2] [nj=32] [ngpu=8] [--normalize]"
&&
exit
1
[
${
PWD
##*/
}
!=
SpeechUT
]
&&
echo
"Error: dir not match! Switch to SpeechUT/ and run it again!"
&&
exit
1
model_path
=
$1
DATA_DIR
=
$2
gen_set
=
$3
beam_size
=
$4
ctc_weight
=
$5
nj
=
$6
ngpu
=
$7
extra
=
$8
[
-z
$extra
]
&&
echo
"Assert decoding base model! If you are decoding large model, please add '--normalize' at the end..."
[
-z
$gen_set
]
&&
gen_set
=
"dev_other"
[
-z
$beam_size
]
&&
beam_size
=
10
[
-z
$ctc_weight
]
&&
ctc_weight
=
0.2
[
$ctc_weight
==
0
]
&&
[
$beam_size
!=
1
]
&&
echo
"Change beam size to 1 as no ctc-decoding used..."
&&
beam_size
=
1
[
$ctc_weight
!=
0
]
&&
extra
=
"
$extra
--batch-size 1"
[
-z
$nj
]
&&
nj
=
32
[
-z
$ngpu
]
&&
ngpu
=
8
src_dir
=
${
model_path
%/*
}
cpt
=
${
model_path
##*/
}
cpt
=
${
cpt
%.*
}
CODE_ROOT
=
${
PWD
}
world_size
=
$nj
for
rank
in
$(
seq
0
$((
nj
-
1
))
)
;
do
export
CUDA_VISIBLE_DEVICES
=
$((
rank
%
$ngpu
))
for
subset
in
${
gen_set
//,/
}
;
do
results_path
=
$src_dir
/decode_
${
cpt
}
/beam
${
beam_size
}
_ctc
${
ctc_weight
}
/
${
subset
}
_
${
world_size
}
_
${
rank
}
[
!
-d
$results_path
]
&&
mkdir
-p
$results_path
python
$CODE_ROOT
/fairseq/fairseq_cli/generate.py
$DATA_DIR
\
--user-dir
$CODE_ROOT
/speechut
\
--label-dir
${
DATA_DIR
}
\
--labels
'["ltr"]'
\
--single-target
\
--post-process
letter
\
--gen-subset
${
subset
}
\
--max-tokens
2000000
\
\
--task
joint_sc2t_pretraining
\
--add-decoder-target
\
--fine-tuning
\
--pad-audio
\
--random-crop
\
\
--ctc-weight
${
ctc_weight
}
$extra
\
--beam
${
beam_size
}
\
\
--path
${
model_path
}
\
--results-path
$results_path
\
\
--scoring
wer
--max-len-a
0.00078125
--max-len-b
200
\
--distributed-world-size
${
world_size
}
--distributed-rank
${
rank
}
\
&
done
done
wait
for
subset
in
${
gen_set
//,/
}
;
do
results_dir
=
$src_dir
/decode_
${
cpt
}
/beam
${
beam_size
}
_ctc
${
ctc_weight
}
cat
$results_dir
/
${
subset
}
_
${
world_size
}
_
*
/generate-
${
subset
}
.txt |
grep
-v
"^Generate"
>
$results_dir
/generate-
${
subset
}
.all.txt
done
Speech2S/speech2s/scripts copy/tune_speechut_st/finetune_base_mustc_enxx.sh
0 → 100644
View file @
12c90639
# ####################################
# SpeechUT Base model #
# ####################################
[
$#
-lt
4
]
&&
echo
"Usage:
$0
<model_path> <data_dir> <lang> <cpt-tag> [mount=
${
PWD
}
] [world_size=8] [update_freq=4/6]"
&&
exit
0
[
${
PWD
##*/
}
!=
SpeechUT
]
&&
echo
"Error: dir not match! Switch to SpeechUT/ and run it again!"
&&
exit
1
w2v_path
=
$1
DATA_DIR
=
$2
lang
=
$3
cpt
=
$4
mount
=
$5
world_size
=
$6
update_freq
=
$7
[
-z
$mount
]
&&
mount
=
${
PWD
}
[
-z
$world_size
]
&&
world_size
=
8
[
-z
$update_freq
]
&&
update_freq
=
4
CODE_ROOT
=
${
PWD
}
exp_name
=
${
w2v_path
%/*
}
exp_name
=
${
exp_name
##*/
}
MODEL_DIR
=
"
$mount
/exp/finetune_mustc/
$exp_name
/legacy_en
${
lang
}
_from_
${
cpt
}
_bz3.2m_lr3e-5"
[
-d
$MODEL_DIR
]
||
mkdir
-p
$MODEL_DIR
max_tokens
=
800000
python
$CODE_ROOT
/fairseq/fairseq_cli/train.py
${
DATA_DIR
}
\
--save-dir
${
MODEL_DIR
}
\
--user-dir
$CODE_ROOT
/speechut
\
--task
speech_to_text
\
--config-yaml
config_en
${
lang
}
.yaml
\
--train-subset
"train_st"
\
--valid-subset
"dev_st"
\
--fp16
\
--seed
1
\
\
--ddp-backend
no_c10d
\
--distributed-world-size
${
world_size
}
\
--tensorboard-logdir
${
MODEL_DIR
}
\
\
--criterion
label_smoothed_cross_entropy
--report-accuracy
\
--label-smoothing
0.3
\
\
--optimizer
adam
\
--clip-norm
1.0
\
--lr
3e-05
\
--lr-scheduler
polynomial_decay
--warmup-updates
5000
\
--max-update
50000
\
--total-num-update
50000
\
--update-freq
${
update_freq
}
\
\
--max-tokens
${
max_tokens
}
\
--max-sentences
16
\
--max-tokens-valid
${
max_tokens
}
\
--grouped-shuffling
\
--max-source-positions
${
max_tokens
}
\
--skip-invalid-size-inputs-valid-test
\
--num-workers
0
\
--best-checkpoint-metric
"accuracy"
\
--maximize-best-checkpoint-metric
\
\
--arch
"speechut_st_legacy"
\
--w2v-path
${
w2v_path
}
\
--layerdrop
0.1
\
--activation-dropout
0.1
\
--attention-dropout
0.1
\
--feature-grad-mult
1.0
\
\
--apply-mask
--mask-prob
0.5
\
\
--log-format
json
\
--log-interval
100
\
--save-interval
1
\
--keep-last-epochs
5
\
--keep-best-checkpoints
5
\
\
2>&1 |
tee
${
MODEL_DIR
}
/train_en
${
lang
}
.log
Speech2S/speech2s/scripts copy/tune_speechut_st/inference_st.sh
0 → 100644
View file @
12c90639
# ####################################
# SpeechUT Base model #
# ####################################
[
$#
-lt
3
]
&&
echo
"Usage:
$0
<model_path> <data_dir> <lang> [gen-set=dev] [beam_size=10] [lenpen=1.0]"
&&
exit
0
[
${
PWD
##*/
}
!=
SpeechUT
]
&&
echo
"Error: dir not match! Switch to SpeechUT/ and run it again!"
&&
exit
1
model_path
=
$1
DATA_DIR
=
$2
lang
=
$3
gen_set
=
$4
beam_size
=
$5
lenpen
=
$6
[
-z
$gen_set
]
&&
gen_set
=
"dev"
[
-z
$beam_size
]
&&
beam_size
=
10
[
-z
$lenpen
]
&&
lenpen
=
1
src_dir
=
${
model_path
%/*
}
cpt
=
${
model_path
##*/
}
cpt
=
${
cpt
%.*
}
CODE_ROOT
=
${
PWD
}
results_path
=
$src_dir
/decode_
${
cpt
}
_beam
${
beam_size
}
/
${
gen_set
}
[
!
-d
$results_path
]
&&
mkdir
-p
$results_path
python
$CODE_ROOT
/fairseq/fairseq_cli/generate.py
$DATA_DIR
\
--gen-subset
${
gen_set
}
_st
\
--max-tokens
2000000
\
--max-source-positions
2000000
\
--num-workers
0
\
\
--user-dir
$CODE_ROOT
/speechut
\
--task
speech_to_text
\
--config-yaml
config_en
${
lang
}
.yaml
\
\
--path
${
model_path
}
\
--results-path
$results_path
\
\
--scoring
sacrebleu
--max-len-a
0
--max-len-b
512
\
--beam
${
beam_size
}
\
--lenpen
$lenpen
\
# --model-overrides "{'model':{'w2v_path':'/path/to/your/pretrained/model.pt'}}" \
echo
$results_path
tail
-n
1
$results_path
/generate-
*
.txt
sleep
1s
Speech2S/speech2s/scripts/__init__.py
0 → 100644
View file @
12c90639
Speech2S/speech2s/scripts/average_checkpoints.py
0 → 100644
View file @
12c90639
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import
argparse
import
collections
import
os
import
re
import
torch
from
fairseq.file_io
import
PathManager
def
average_checkpoints
(
inputs
):
"""Loads checkpoints from inputs and returns a model with averaged weights.
Args:
inputs: An iterable of string paths of checkpoints to load from.
Returns:
A dict of string keys mapping to various values. The 'model' key
from the returned dict should correspond to an OrderedDict mapping
string parameter names to torch Tensors.
"""
params_dict
=
collections
.
OrderedDict
()
params_keys
=
None
new_state
=
None
num_models
=
len
(
inputs
)
for
fpath
in
inputs
:
with
PathManager
.
open
(
fpath
,
"rb"
)
as
f
:
state
=
torch
.
load
(
f
,
map_location
=
(
lambda
s
,
_
:
torch
.
serialization
.
default_restore_location
(
s
,
"cpu"
)
),
)
# Copies over the settings from the first checkpoint
if
new_state
is
None
:
new_state
=
state
model_params
=
state
[
"model"
]
model_params_keys
=
list
(
model_params
.
keys
())
if
params_keys
is
None
:
params_keys
=
model_params_keys
elif
params_keys
!=
model_params_keys
:
raise
KeyError
(
"For checkpoint {}, expected list of params: {}, "
"but found: {}"
.
format
(
f
,
params_keys
,
model_params_keys
)
)
for
k
in
params_keys
:
p
=
model_params
[
k
]
if
isinstance
(
p
,
torch
.
HalfTensor
):
p
=
p
.
float
()
if
k
not
in
params_dict
:
params_dict
[
k
]
=
p
.
clone
()
# NOTE: clone() is needed in case of p is a shared parameter
else
:
params_dict
[
k
]
+=
p
averaged_params
=
collections
.
OrderedDict
()
for
k
,
v
in
params_dict
.
items
():
averaged_params
[
k
]
=
v
if
averaged_params
[
k
].
is_floating_point
():
averaged_params
[
k
].
div_
(
num_models
)
else
:
averaged_params
[
k
]
//=
num_models
new_state
[
"model"
]
=
averaged_params
return
new_state
def
last_n_checkpoints
(
paths
,
n
,
update_based
,
upper_bound
=
None
):
assert
len
(
paths
)
==
1
path
=
paths
[
0
]
if
update_based
:
pt_regexp
=
re
.
compile
(
r
"checkpoint_\d+_(\d+)\.pt"
)
else
:
pt_regexp
=
re
.
compile
(
r
"checkpoint(\d+)\.pt"
)
files
=
PathManager
.
ls
(
path
)
entries
=
[]
for
f
in
files
:
m
=
pt_regexp
.
fullmatch
(
f
)
if
m
is
not
None
:
sort_key
=
int
(
m
.
group
(
1
))
if
upper_bound
is
None
or
sort_key
<=
upper_bound
:
entries
.
append
((
sort_key
,
m
.
group
(
0
)))
if
len
(
entries
)
<
n
:
raise
Exception
(
"Found {} checkpoint files but need at least {}"
,
len
(
entries
),
n
)
return
[
os
.
path
.
join
(
path
,
x
[
1
])
for
x
in
sorted
(
entries
,
reverse
=
True
)[:
n
]]
def
main
():
parser
=
argparse
.
ArgumentParser
(
description
=
"Tool to average the params of input checkpoints to "
"produce a new checkpoint"
,
)
# fmt: off
parser
.
add_argument
(
'--inputs'
,
required
=
True
,
nargs
=
'+'
,
help
=
'Input checkpoint file paths.'
)
parser
.
add_argument
(
'--output'
,
required
=
True
,
metavar
=
'FILE'
,
help
=
'Write the new checkpoint containing the averaged weights to this path.'
)
num_group
=
parser
.
add_mutually_exclusive_group
()
num_group
.
add_argument
(
'--num-epoch-checkpoints'
,
type
=
int
,
help
=
'if set, will try to find checkpoints with names checkpoint_xx.pt in the '
'path specified by input, and average last this many of them.'
)
num_group
.
add_argument
(
'--num-update-checkpoints'
,
type
=
int
,
help
=
'if set, will try to find checkpoints with names checkpoint_ee_xx.pt in the path specified by'
' input, and average last this many of them.'
)
parser
.
add_argument
(
'--checkpoint-upper-bound'
,
type
=
int
,
help
=
'when using --num-epoch-checkpoints, this will set an upper bound on which epoch to use, '
'when using --num-update-checkpoints, this will set an upper bound on which update to use'
'e.g., with --num-epoch-checkpoints=10 --checkpoint-upper-bound=50, checkpoints 41-50 would be'
' averaged.'
'e.g., with --num-update-checkpoints=10 --checkpoint-upper-bound=50000, checkpoints 40500-50000 would'
' be averaged assuming --save-interval-updates 500'
)
# fmt: on
args
=
parser
.
parse_args
()
print
(
args
)
num
=
None
is_update_based
=
False
if
args
.
num_update_checkpoints
is
not
None
:
num
=
args
.
num_update_checkpoints
is_update_based
=
True
elif
args
.
num_epoch_checkpoints
is
not
None
:
num
=
args
.
num_epoch_checkpoints
assert
args
.
checkpoint_upper_bound
is
None
or
(
args
.
num_epoch_checkpoints
is
not
None
or
args
.
num_update_checkpoints
is
not
None
),
"--checkpoint-upper-bound requires --num-epoch-checkpoints or --num-update-checkpoints"
assert
(
args
.
num_epoch_checkpoints
is
None
or
args
.
num_update_checkpoints
is
None
),
"Cannot combine --num-epoch-checkpoints and --num-update-checkpoints"
if
num
is
not
None
:
args
.
inputs
=
last_n_checkpoints
(
args
.
inputs
,
num
,
is_update_based
,
upper_bound
=
args
.
checkpoint_upper_bound
,
)
print
(
"averaging checkpoints: "
,
args
.
inputs
)
new_state
=
average_checkpoints
(
args
.
inputs
)
with
PathManager
.
open
(
args
.
output
,
"wb"
)
as
f
:
torch
.
save
(
new_state
,
f
)
print
(
"Finished writing averaged checkpoint to {}"
.
format
(
args
.
output
))
if
__name__
==
"__main__"
:
main
()
Speech2S/speech2s/scripts/build_sym_alignment.py
0 → 100644
View file @
12c90639
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""
Use this script in order to build symmetric alignments for your translation
dataset.
This script depends on fast_align and mosesdecoder tools. You will need to
build those before running the script.
fast_align:
github: http://github.com/clab/fast_align
instructions: follow the instructions in README.md
mosesdecoder:
github: http://github.com/moses-smt/mosesdecoder
instructions: http://www.statmt.org/moses/?n=Development.GetStarted
The script produces the following files under --output_dir:
text.joined - concatenation of lines from the source_file and the
target_file.
align.forward - forward pass of fast_align.
align.backward - backward pass of fast_align.
aligned.sym_heuristic - symmetrized alignment.
"""
import
argparse
import
os
from
itertools
import
zip_longest
def
main
():
parser
=
argparse
.
ArgumentParser
(
description
=
"symmetric alignment builer"
)
# fmt: off
parser
.
add_argument
(
'--fast_align_dir'
,
help
=
'path to fast_align build directory'
)
parser
.
add_argument
(
'--mosesdecoder_dir'
,
help
=
'path to mosesdecoder root directory'
)
parser
.
add_argument
(
'--sym_heuristic'
,
help
=
'heuristic to use for symmetrization'
,
default
=
'grow-diag-final-and'
)
parser
.
add_argument
(
'--source_file'
,
help
=
'path to a file with sentences '
'in the source language'
)
parser
.
add_argument
(
'--target_file'
,
help
=
'path to a file with sentences '
'in the target language'
)
parser
.
add_argument
(
'--output_dir'
,
help
=
'output directory'
)
# fmt: on
args
=
parser
.
parse_args
()
fast_align_bin
=
os
.
path
.
join
(
args
.
fast_align_dir
,
"fast_align"
)
symal_bin
=
os
.
path
.
join
(
args
.
mosesdecoder_dir
,
"bin"
,
"symal"
)
sym_fast_align_bin
=
os
.
path
.
join
(
args
.
mosesdecoder_dir
,
"scripts"
,
"ems"
,
"support"
,
"symmetrize-fast-align.perl"
)
# create joined file
joined_file
=
os
.
path
.
join
(
args
.
output_dir
,
"text.joined"
)
with
open
(
args
.
source_file
,
"r"
,
encoding
=
"utf-8"
)
as
src
,
open
(
args
.
target_file
,
"r"
,
encoding
=
"utf-8"
)
as
tgt
:
with
open
(
joined_file
,
"w"
,
encoding
=
"utf-8"
)
as
joined
:
for
s
,
t
in
zip_longest
(
src
,
tgt
):
print
(
"{} ||| {}"
.
format
(
s
.
strip
(),
t
.
strip
()),
file
=
joined
)
bwd_align_file
=
os
.
path
.
join
(
args
.
output_dir
,
"align.backward"
)
# run forward alignment
fwd_align_file
=
os
.
path
.
join
(
args
.
output_dir
,
"align.forward"
)
fwd_fast_align_cmd
=
"{FASTALIGN} -i {JOINED} -d -o -v > {FWD}"
.
format
(
FASTALIGN
=
fast_align_bin
,
JOINED
=
joined_file
,
FWD
=
fwd_align_file
)
assert
os
.
system
(
fwd_fast_align_cmd
)
==
0
# run backward alignment
bwd_align_file
=
os
.
path
.
join
(
args
.
output_dir
,
"align.backward"
)
bwd_fast_align_cmd
=
"{FASTALIGN} -i {JOINED} -d -o -v -r > {BWD}"
.
format
(
FASTALIGN
=
fast_align_bin
,
JOINED
=
joined_file
,
BWD
=
bwd_align_file
)
assert
os
.
system
(
bwd_fast_align_cmd
)
==
0
# run symmetrization
sym_out_file
=
os
.
path
.
join
(
args
.
output_dir
,
"aligned"
)
sym_cmd
=
"{SYMFASTALIGN} {FWD} {BWD} {SRC} {TGT} {OUT} {HEURISTIC} {SYMAL}"
.
format
(
SYMFASTALIGN
=
sym_fast_align_bin
,
FWD
=
fwd_align_file
,
BWD
=
bwd_align_file
,
SRC
=
args
.
source_file
,
TGT
=
args
.
target_file
,
OUT
=
sym_out_file
,
HEURISTIC
=
args
.
sym_heuristic
,
SYMAL
=
symal_bin
,
)
assert
os
.
system
(
sym_cmd
)
==
0
if
__name__
==
"__main__"
:
main
()
Speech2S/speech2s/scripts/compare_namespaces.py
0 → 100644
View file @
12c90639
#!/usr/bin/env python
"""Helper script to compare two argparse.Namespace objects."""
from
argparse
import
Namespace
# noqa
def
main
():
ns1
=
eval
(
input
(
"Namespace 1: "
))
ns2
=
eval
(
input
(
"Namespace 2: "
))
def
keys
(
ns
):
ks
=
set
()
for
k
in
dir
(
ns
):
if
not
k
.
startswith
(
"_"
):
ks
.
add
(
k
)
return
ks
k1
=
keys
(
ns1
)
k2
=
keys
(
ns2
)
def
print_keys
(
ks
,
ns1
,
ns2
=
None
):
for
k
in
ks
:
if
ns2
is
None
:
print
(
"{}
\t
{}"
.
format
(
k
,
getattr
(
ns1
,
k
,
None
)))
else
:
print
(
"{}
\t
{}
\t
{}"
.
format
(
k
,
getattr
(
ns1
,
k
,
None
),
getattr
(
ns2
,
k
,
None
))
)
print
(
"Keys unique to namespace 1:"
)
print_keys
(
k1
-
k2
,
ns1
)
print
()
print
(
"Keys unique to namespace 2:"
)
print_keys
(
k2
-
k1
,
ns2
)
print
()
print
(
"Overlapping keys with different values:"
)
ks
=
[
k
for
k
in
k1
&
k2
if
getattr
(
ns1
,
k
,
"None"
)
!=
getattr
(
ns2
,
k
,
"None"
)]
print_keys
(
ks
,
ns1
,
ns2
)
print
()
if
__name__
==
"__main__"
:
main
()
Speech2S/speech2s/scripts/compound_split_bleu.sh
0 → 100644
View file @
12c90639
#!/bin/bash
if
[
$#
-ne
1
]
;
then
echo
"usage:
$0
GENERATE_PY_OUTPUT"
exit
1
fi
GEN
=
$1
SYS
=
$GEN
.sys
REF
=
$GEN
.ref
if
[
$(
tail
-n
1
$GEN
|
grep
BLEU |
wc
-l
)
-ne
1
]
;
then
echo
"not done generating"
exit
fi
grep
^H
$GEN
|
awk
-F
'\t'
'{print $NF}'
| perl
-ple
's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g'
>
$SYS
grep
^T
$GEN
|
cut
-f2-
| perl
-ple
's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g'
>
$REF
fairseq-score
--sys
$SYS
--ref
$REF
Speech2S/speech2s/scripts/constraints/extract.py
0 → 100644
View file @
12c90639
#!/usr/bin/env python3
#
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""Extracts random constraints from reference files."""
import
argparse
import
random
import
sys
def
get_phrase
(
words
,
index
,
length
):
assert
index
<
len
(
words
)
-
length
+
1
phr
=
" "
.
join
(
words
[
index
:
index
+
length
])
for
i
in
range
(
index
,
index
+
length
):
words
.
pop
(
index
)
return
phr
def
main
(
args
):
if
args
.
seed
:
random
.
seed
(
args
.
seed
)
for
line
in
sys
.
stdin
:
constraints
=
[]
def
add_constraint
(
constraint
):
constraints
.
append
(
constraint
)
source
=
line
.
rstrip
()
if
"
\t
"
in
line
:
source
,
target
=
line
.
split
(
"
\t
"
)
if
args
.
add_sos
:
target
=
f
"<s>
{
target
}
"
if
args
.
add_eos
:
target
=
f
"
{
target
}
</s>"
if
len
(
target
.
split
())
>=
args
.
len
:
words
=
[
target
]
num
=
args
.
number
choices
=
{}
for
i
in
range
(
num
):
if
len
(
words
)
==
0
:
break
segmentno
=
random
.
choice
(
range
(
len
(
words
)))
segment
=
words
.
pop
(
segmentno
)
tokens
=
segment
.
split
()
phrase_index
=
random
.
choice
(
range
(
len
(
tokens
)))
choice
=
" "
.
join
(
tokens
[
phrase_index
:
min
(
len
(
tokens
),
phrase_index
+
args
.
len
)]
)
for
j
in
range
(
phrase_index
,
min
(
len
(
tokens
),
phrase_index
+
args
.
len
)
):
tokens
.
pop
(
phrase_index
)
if
phrase_index
>
0
:
words
.
append
(
" "
.
join
(
tokens
[
0
:
phrase_index
]))
if
phrase_index
+
1
<
len
(
tokens
):
words
.
append
(
" "
.
join
(
tokens
[
phrase_index
:]))
choices
[
target
.
find
(
choice
)]
=
choice
# mask out with spaces
target
=
target
.
replace
(
choice
,
" "
*
len
(
choice
),
1
)
for
key
in
sorted
(
choices
.
keys
()):
add_constraint
(
choices
[
key
])
print
(
source
,
*
constraints
,
sep
=
"
\t
"
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--number"
,
"-n"
,
type
=
int
,
default
=
1
,
help
=
"number of phrases"
)
parser
.
add_argument
(
"--len"
,
"-l"
,
type
=
int
,
default
=
1
,
help
=
"phrase length"
)
parser
.
add_argument
(
"--add-sos"
,
default
=
False
,
action
=
"store_true"
,
help
=
"add <s> token"
)
parser
.
add_argument
(
"--add-eos"
,
default
=
False
,
action
=
"store_true"
,
help
=
"add </s> token"
)
parser
.
add_argument
(
"--seed"
,
"-s"
,
default
=
0
,
type
=
int
)
args
=
parser
.
parse_args
()
main
(
args
)
Speech2S/speech2s/scripts/constraints/validate.py
0 → 100644
View file @
12c90639
#!/usr/bin/env python3
#
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import
sys
"""Reads in a fairseq output file, and verifies that the constraints
(C- lines) are present in the output (the first H- line). Assumes that
constraints are listed prior to the first hypothesis.
"""
constraints
=
[]
found
=
0
total
=
0
for
line
in
sys
.
stdin
:
if
line
.
startswith
(
"C-"
):
constraints
.
append
(
line
.
rstrip
().
split
(
"
\t
"
)[
1
])
elif
line
.
startswith
(
"H-"
):
text
=
line
.
split
(
"
\t
"
)[
2
]
for
constraint
in
constraints
:
total
+=
1
if
constraint
in
text
:
found
+=
1
else
:
print
(
f
"No
{
constraint
}
in
{
text
}
"
,
file
=
sys
.
stderr
)
constraints
=
[]
print
(
f
"Found
{
found
}
/
{
total
}
=
{
100
*
found
/
total
:.
1
f
}
%"
)
Speech2S/speech2s/scripts/convert_dictionary.lua
0 → 100644
View file @
12c90639
-- Copyright (c) Facebook, Inc. and its affiliates.
--
-- This source code is licensed under the MIT license found in the
-- LICENSE file in the root directory of this source tree.
--
-- Usage: convert_dictionary.lua <dict.th7>
require
'fairseq'
require
'torch'
require
'paths'
if
#
arg
<
1
then
print
(
'usage: convert_dictionary.lua <dict.th7>'
)
os.exit
(
1
)
end
if
not
paths
.
filep
(
arg
[
1
])
then
print
(
'error: file does not exit: '
..
arg
[
1
])
os.exit
(
1
)
end
dict
=
torch
.
load
(
arg
[
1
])
dst
=
paths
.
basename
(
arg
[
1
]):
gsub
(
'.th7'
,
'.txt'
)
assert
(
dst
:
match
(
'.txt$'
))
f
=
io.open
(
dst
,
'w'
)
for
idx
,
symbol
in
ipairs
(
dict
.
index_to_symbol
)
do
if
idx
>
dict
.
cutoff
then
break
end
f
:
write
(
symbol
)
f
:
write
(
' '
)
f
:
write
(
dict
.
index_to_freq
[
idx
])
f
:
write
(
'
\n
'
)
end
f
:
close
()
Speech2S/speech2s/scripts/convert_model.lua
0 → 100644
View file @
12c90639
-- Copyright (c) Facebook, Inc. and its affiliates.
--
-- This source code is licensed under the MIT license found in the
-- LICENSE file in the root directory of this source tree.
--
-- Usage: convert_model.lua <model_epoch1.th7>
require
'torch'
local
fairseq
=
require
'fairseq'
model
=
torch
.
load
(
arg
[
1
])
function
find_weight_norm
(
container
,
module
)
for
_
,
wn
in
ipairs
(
container
:
listModules
())
do
if
torch
.
type
(
wn
)
==
'nn.WeightNorm'
and
wn
.
modules
[
1
]
==
module
then
return
wn
end
end
end
function
push_state
(
dict
,
key
,
module
)
if
torch
.
type
(
module
)
==
'nn.Linear'
then
local
wn
=
find_weight_norm
(
model
.
module
,
module
)
assert
(
wn
)
dict
[
key
..
'.weight_v'
]
=
wn
.
v
:
float
()
dict
[
key
..
'.weight_g'
]
=
wn
.
g
:
float
()
elseif
torch
.
type
(
module
)
==
'nn.TemporalConvolutionTBC'
then
local
wn
=
find_weight_norm
(
model
.
module
,
module
)
assert
(
wn
)
local
v
=
wn
.
v
:
float
():
view
(
wn
.
viewOut
):
transpose
(
2
,
3
)
dict
[
key
..
'.weight_v'
]
=
v
dict
[
key
..
'.weight_g'
]
=
wn
.
g
:
float
():
view
(
module
.
weight
:
size
(
3
),
1
,
1
)
else
dict
[
key
..
'.weight'
]
=
module
.
weight
:
float
()
end
if
module
.
bias
then
dict
[
key
..
'.bias'
]
=
module
.
bias
:
float
()
end
end
encoder_dict
=
{}
decoder_dict
=
{}
combined_dict
=
{}
function
encoder_state
(
encoder
)
luts
=
encoder
:
findModules
(
'nn.LookupTable'
)
push_state
(
encoder_dict
,
'embed_tokens'
,
luts
[
1
])
push_state
(
encoder_dict
,
'embed_positions'
,
luts
[
2
])
fcs
=
encoder
:
findModules
(
'nn.Linear'
)
assert
(
#
fcs
>=
2
)
local
nInputPlane
=
fcs
[
1
].
weight
:
size
(
1
)
push_state
(
encoder_dict
,
'fc1'
,
table.remove
(
fcs
,
1
))
push_state
(
encoder_dict
,
'fc2'
,
table.remove
(
fcs
,
#
fcs
))
for
i
,
module
in
ipairs
(
encoder
:
findModules
(
'nn.TemporalConvolutionTBC'
))
do
push_state
(
encoder_dict
,
'convolutions.'
..
tostring
(
i
-
1
),
module
)
if
nInputPlane
~=
module
.
weight
:
size
(
3
)
/
2
then
push_state
(
encoder_dict
,
'projections.'
..
tostring
(
i
-
1
),
table.remove
(
fcs
,
1
))
end
nInputPlane
=
module
.
weight
:
size
(
3
)
/
2
end
assert
(
#
fcs
==
0
)
end
function
decoder_state
(
decoder
)
luts
=
decoder
:
findModules
(
'nn.LookupTable'
)
push_state
(
decoder_dict
,
'embed_tokens'
,
luts
[
1
])
push_state
(
decoder_dict
,
'embed_positions'
,
luts
[
2
])
fcs
=
decoder
:
findModules
(
'nn.Linear'
)
local
nInputPlane
=
fcs
[
1
].
weight
:
size
(
1
)
push_state
(
decoder_dict
,
'fc1'
,
table.remove
(
fcs
,
1
))
push_state
(
decoder_dict
,
'fc2'
,
fcs
[
#
fcs
-
1
])
push_state
(
decoder_dict
,
'fc3'
,
fcs
[
#
fcs
])
table.remove
(
fcs
,
#
fcs
)
table.remove
(
fcs
,
#
fcs
)
for
i
,
module
in
ipairs
(
decoder
:
findModules
(
'nn.TemporalConvolutionTBC'
))
do
if
nInputPlane
~=
module
.
weight
:
size
(
3
)
/
2
then
push_state
(
decoder_dict
,
'projections.'
..
tostring
(
i
-
1
),
table.remove
(
fcs
,
1
))
end
nInputPlane
=
module
.
weight
:
size
(
3
)
/
2
local
prefix
=
'attention.'
..
tostring
(
i
-
1
)
push_state
(
decoder_dict
,
prefix
..
'.in_projection'
,
table.remove
(
fcs
,
1
))
push_state
(
decoder_dict
,
prefix
..
'.out_projection'
,
table.remove
(
fcs
,
1
))
push_state
(
decoder_dict
,
'convolutions.'
..
tostring
(
i
-
1
),
module
)
end
assert
(
#
fcs
==
0
)
end
_encoder
=
model
.
module
.
modules
[
2
]
_decoder
=
model
.
module
.
modules
[
3
]
encoder_state
(
_encoder
)
decoder_state
(
_decoder
)
for
k
,
v
in
pairs
(
encoder_dict
)
do
combined_dict
[
'encoder.'
..
k
]
=
v
end
for
k
,
v
in
pairs
(
decoder_dict
)
do
combined_dict
[
'decoder.'
..
k
]
=
v
end
torch
.
save
(
'state_dict.t7'
,
combined_dict
)
Speech2S/speech2s/scripts/count_docs.py
0 → 100644
View file @
12c90639
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""
Count the number of documents and average number of lines and tokens per
document in a large file. Documents should be separated by a single empty line.
"""
import
argparse
import
gzip
import
sys
import
numpy
as
np
def
main
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"input"
)
parser
.
add_argument
(
"--gzip"
,
action
=
"store_true"
)
args
=
parser
.
parse_args
()
def
gopen
():
if
args
.
gzip
:
return
gzip
.
open
(
args
.
input
,
"r"
)
else
:
return
open
(
args
.
input
,
"r"
,
encoding
=
"utf-8"
)
num_lines
=
[]
num_toks
=
[]
with
gopen
()
as
h
:
num_docs
=
1
num_lines_in_doc
=
0
num_toks_in_doc
=
0
for
i
,
line
in
enumerate
(
h
):
if
len
(
line
.
strip
())
==
0
:
# empty line indicates new document
num_docs
+=
1
num_lines
.
append
(
num_lines_in_doc
)
num_toks
.
append
(
num_toks_in_doc
)
num_lines_in_doc
=
0
num_toks_in_doc
=
0
else
:
num_lines_in_doc
+=
1
num_toks_in_doc
+=
len
(
line
.
rstrip
().
split
())
if
i
%
1000000
==
0
:
print
(
i
,
file
=
sys
.
stderr
,
end
=
""
,
flush
=
True
)
elif
i
%
100000
==
0
:
print
(
"."
,
file
=
sys
.
stderr
,
end
=
""
,
flush
=
True
)
print
(
file
=
sys
.
stderr
,
flush
=
True
)
print
(
"found {} docs"
.
format
(
num_docs
))
print
(
"average num lines per doc: {}"
.
format
(
np
.
mean
(
num_lines
)))
print
(
"average num toks per doc: {}"
.
format
(
np
.
mean
(
num_toks
)))
if
__name__
==
"__main__"
:
main
()
Speech2S/speech2s/scripts/read_binarized.py
0 → 100644
View file @
12c90639
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import
argparse
from
fairseq.data
import
Dictionary
,
data_utils
,
indexed_dataset
def
get_parser
():
parser
=
argparse
.
ArgumentParser
(
description
=
"writes text from binarized file to stdout"
)
# fmt: off
parser
.
add_argument
(
'--dataset-impl'
,
help
=
'dataset implementation'
,
choices
=
indexed_dataset
.
get_available_dataset_impl
())
parser
.
add_argument
(
'--dict'
,
metavar
=
'FP'
,
help
=
'dictionary containing known words'
,
default
=
None
)
parser
.
add_argument
(
'--input'
,
metavar
=
'FP'
,
required
=
True
,
help
=
'binarized file to read'
)
# fmt: on
return
parser
def
main
():
parser
=
get_parser
()
args
=
parser
.
parse_args
()
dictionary
=
Dictionary
.
load
(
args
.
dict
)
if
args
.
dict
is
not
None
else
None
dataset
=
data_utils
.
load_indexed_dataset
(
args
.
input
,
dictionary
,
dataset_impl
=
args
.
dataset_impl
,
default
=
"lazy"
,
)
for
tensor_line
in
dataset
:
if
dictionary
is
None
:
line
=
" "
.
join
([
str
(
int
(
x
))
for
x
in
tensor_line
])
else
:
line
=
dictionary
.
string
(
tensor_line
)
print
(
line
)
if
__name__
==
"__main__"
:
main
()
Speech2S/speech2s/scripts/rm_pt.py
0 → 100644
View file @
12c90639
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import
argparse
import
os
import
re
import
shutil
import
sys
pt_regexp
=
re
.
compile
(
r
"checkpoint(\d+|_\d+_\d+|_[a-z]+)\.pt"
)
pt_regexp_epoch_based
=
re
.
compile
(
r
"checkpoint(\d+)\.pt"
)
pt_regexp_update_based
=
re
.
compile
(
r
"checkpoint_\d+_(\d+)\.pt"
)
def
parse_checkpoints
(
files
):
entries
=
[]
for
f
in
files
:
m
=
pt_regexp_epoch_based
.
fullmatch
(
f
)
if
m
is
not
None
:
entries
.
append
((
int
(
m
.
group
(
1
)),
m
.
group
(
0
)))
else
:
m
=
pt_regexp_update_based
.
fullmatch
(
f
)
if
m
is
not
None
:
entries
.
append
((
int
(
m
.
group
(
1
)),
m
.
group
(
0
)))
return
entries
def
last_n_checkpoints
(
files
,
n
):
entries
=
parse_checkpoints
(
files
)
return
[
x
[
1
]
for
x
in
sorted
(
entries
,
reverse
=
True
)[:
n
]]
def
every_n_checkpoints
(
files
,
n
):
entries
=
parse_checkpoints
(
files
)
return
[
x
[
1
]
for
x
in
sorted
(
sorted
(
entries
)[::
-
n
])]
def
main
():
parser
=
argparse
.
ArgumentParser
(
description
=
(
"Recursively delete checkpoint files from `root_dir`, "
"but preserve checkpoint_best.pt and checkpoint_last.pt"
)
)
parser
.
add_argument
(
"root_dirs"
,
nargs
=
"*"
)
parser
.
add_argument
(
"--save-last"
,
type
=
int
,
default
=
0
,
help
=
"number of last checkpoints to save"
)
parser
.
add_argument
(
"--save-every"
,
type
=
int
,
default
=
0
,
help
=
"interval of checkpoints to save"
)
parser
.
add_argument
(
"--preserve-test"
,
action
=
"store_true"
,
help
=
"preserve checkpoints in dirs that start with test_ prefix (default: delete them)"
,
)
parser
.
add_argument
(
"--delete-best"
,
action
=
"store_true"
,
help
=
"delete checkpoint_best.pt"
)
parser
.
add_argument
(
"--delete-last"
,
action
=
"store_true"
,
help
=
"delete checkpoint_last.pt"
)
parser
.
add_argument
(
"--no-dereference"
,
action
=
"store_true"
,
help
=
"don't dereference symlinks"
)
args
=
parser
.
parse_args
()
files_to_desymlink
=
[]
files_to_preserve
=
[]
files_to_delete
=
[]
for
root_dir
in
args
.
root_dirs
:
for
root
,
_subdirs
,
files
in
os
.
walk
(
root_dir
):
if
args
.
save_last
>
0
:
to_save
=
last_n_checkpoints
(
files
,
args
.
save_last
)
else
:
to_save
=
[]
if
args
.
save_every
>
0
:
to_save
+=
every_n_checkpoints
(
files
,
args
.
save_every
)
for
file
in
files
:
if
not
pt_regexp
.
fullmatch
(
file
):
continue
full_path
=
os
.
path
.
join
(
root
,
file
)
if
(
not
os
.
path
.
basename
(
root
).
startswith
(
"test_"
)
or
args
.
preserve_test
)
and
(
(
file
==
"checkpoint_last.pt"
and
not
args
.
delete_last
)
or
(
file
==
"checkpoint_best.pt"
and
not
args
.
delete_best
)
or
file
in
to_save
):
if
os
.
path
.
islink
(
full_path
)
and
not
args
.
no_dereference
:
files_to_desymlink
.
append
(
full_path
)
else
:
files_to_preserve
.
append
(
full_path
)
else
:
files_to_delete
.
append
(
full_path
)
if
len
(
files_to_desymlink
)
==
0
and
len
(
files_to_delete
)
==
0
:
print
(
"Nothing to do."
)
sys
.
exit
(
0
)
files_to_desymlink
=
sorted
(
files_to_desymlink
)
files_to_preserve
=
sorted
(
files_to_preserve
)
files_to_delete
=
sorted
(
files_to_delete
)
print
(
"Operations to perform (in order):"
)
if
len
(
files_to_desymlink
)
>
0
:
for
file
in
files_to_desymlink
:
print
(
" - preserve (and dereference symlink): "
+
file
)
if
len
(
files_to_preserve
)
>
0
:
for
file
in
files_to_preserve
:
print
(
" - preserve: "
+
file
)
if
len
(
files_to_delete
)
>
0
:
for
file
in
files_to_delete
:
print
(
" - delete: "
+
file
)
while
True
:
resp
=
input
(
"Continue? (Y/N): "
)
if
resp
.
strip
().
lower
()
==
"y"
:
break
elif
resp
.
strip
().
lower
()
==
"n"
:
sys
.
exit
(
0
)
print
(
"Executing..."
)
if
len
(
files_to_desymlink
)
>
0
:
for
file
in
files_to_desymlink
:
realpath
=
os
.
path
.
realpath
(
file
)
print
(
"rm "
+
file
)
os
.
remove
(
file
)
print
(
"cp {} {}"
.
format
(
realpath
,
file
))
shutil
.
copyfile
(
realpath
,
file
)
if
len
(
files_to_delete
)
>
0
:
for
file
in
files_to_delete
:
print
(
"rm "
+
file
)
os
.
remove
(
file
)
if
__name__
==
"__main__"
:
main
()
Prev
1
2
3
4
5
6
7
8
…
18
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment