Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
3a599315
"vscode:/vscode.git/clone" did not exist on "d425f007782051ab08e9c0fbfe06b072313f4649"
Unverified
Commit
3a599315
authored
Oct 26, 2021
by
moto
Committed by
GitHub
Oct 26, 2021
Browse files
Add wav2vec2 ASR Spanish pretrained model from voxpopuli (#1924)
parent
56f3b927
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
98 additions
and
0 deletions
+98
-0
docs/source/pipelines.rst
docs/source/pipelines.rst
+8
-0
test/integration_tests/conftest.py
test/integration_tests/conftest.py
+1
-0
test/integration_tests/wav2vec2_pipeline_test.py
test/integration_tests/wav2vec2_pipeline_test.py
+2
-0
torchaudio/pipelines/__init__.py
torchaudio/pipelines/__init__.py
+2
-0
torchaudio/pipelines/_wav2vec2/impl.py
torchaudio/pipelines/_wav2vec2/impl.py
+46
-0
torchaudio/pipelines/_wav2vec2/utils.py
torchaudio/pipelines/_wav2vec2/utils.py
+39
-0
No files found.
docs/source/pipelines.rst
View file @
3a599315
...
@@ -153,6 +153,14 @@ WAV2VEC2_ASR_LARGE_LV60K_960H
...
@@ -153,6 +153,14 @@ WAV2VEC2_ASR_LARGE_LV60K_960H
.. autodata:: WAV2VEC2_ASR_LARGE_LV60K_960H
.. autodata:: WAV2VEC2_ASR_LARGE_LV60K_960H
:no-value:
:no-value:
VOXPOPULI_ASR_BASE_10K_ES
~~~~~~~~~~~~~~~~~~~~~~~~~
.. container:: py attribute
.. autodata:: VOXPOPULI_ASR_BASE_10K_ES
:no-value:
VOXPOPULI_ASR_BASE_10K_FR
VOXPOPULI_ASR_BASE_10K_FR
~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~
...
...
test/integration_tests/conftest.py
View file @
3a599315
...
@@ -34,6 +34,7 @@ def ctc_decoder():
...
@@ -34,6 +34,7 @@ def ctc_decoder():
_FILES
=
{
_FILES
=
{
'en'
:
'Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.flac'
,
'en'
:
'Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.flac'
,
'es'
:
'20130207-0900-PLENARY-7-es_20130207-13_02_05_5.flac'
,
'fr'
:
'20121212-0900-PLENARY-5-fr_20121212-11_37_04_10.flac'
,
'fr'
:
'20121212-0900-PLENARY-5-fr_20121212-11_37_04_10.flac'
,
}
}
...
...
test/integration_tests/wav2vec2_pipeline_test.py
View file @
3a599315
...
@@ -18,6 +18,7 @@ from torchaudio.pipelines import (
...
@@ -18,6 +18,7 @@ from torchaudio.pipelines import (
HUBERT_XLARGE
,
HUBERT_XLARGE
,
HUBERT_ASR_LARGE
,
HUBERT_ASR_LARGE
,
HUBERT_ASR_XLARGE
,
HUBERT_ASR_XLARGE
,
VOXPOPULI_ASR_BASE_10K_ES
,
VOXPOPULI_ASR_BASE_10K_FR
,
VOXPOPULI_ASR_BASE_10K_FR
,
)
)
import
pytest
import
pytest
...
@@ -54,6 +55,7 @@ def test_pretraining_models(bundle):
...
@@ -54,6 +55,7 @@ def test_pretraining_models(bundle):
(
WAV2VEC2_ASR_LARGE_LV60K_960H
,
'en'
,
'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'
),
(
WAV2VEC2_ASR_LARGE_LV60K_960H
,
'en'
,
'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'
),
(
HUBERT_ASR_LARGE
,
'en'
,
'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'
),
(
HUBERT_ASR_LARGE
,
'en'
,
'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'
),
(
HUBERT_ASR_XLARGE
,
'en'
,
'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'
),
(
HUBERT_ASR_XLARGE
,
'en'
,
'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'
),
(
VOXPOPULI_ASR_BASE_10K_ES
,
'es'
,
"la|primera|que|es|imprescindible|pensar|a|pequeña|a|escala|para|implicar|y|complementar|así|la|actuación|global"
),
# noqa: E501
(
VOXPOPULI_ASR_BASE_10K_FR
,
'fr'
,
'la|commission|va|faire|des|propositions|sur|ce|sujet|comment|mettre|en|place|cette|capacité|fiscale|et|le|conseil|européen|y|reviendra|sour|les|sujets|au|moins|de|mars'
),
# noqa: E501
(
VOXPOPULI_ASR_BASE_10K_FR
,
'fr'
,
'la|commission|va|faire|des|propositions|sur|ce|sujet|comment|mettre|en|place|cette|capacité|fiscale|et|le|conseil|européen|y|reviendra|sour|les|sujets|au|moins|de|mars'
),
# noqa: E501
]
]
)
)
...
...
torchaudio/pipelines/__init__.py
View file @
3a599315
...
@@ -14,6 +14,7 @@ from ._wav2vec2.impl import (
...
@@ -14,6 +14,7 @@ from ._wav2vec2.impl import (
WAV2VEC2_ASR_LARGE_LV60K_100H
,
WAV2VEC2_ASR_LARGE_LV60K_100H
,
WAV2VEC2_ASR_LARGE_LV60K_960H
,
WAV2VEC2_ASR_LARGE_LV60K_960H
,
WAV2VEC2_XLSR53
,
WAV2VEC2_XLSR53
,
VOXPOPULI_ASR_BASE_10K_ES
,
VOXPOPULI_ASR_BASE_10K_FR
,
VOXPOPULI_ASR_BASE_10K_FR
,
HUBERT_BASE
,
HUBERT_BASE
,
HUBERT_LARGE
,
HUBERT_LARGE
,
...
@@ -45,6 +46,7 @@ __all__ = [
...
@@ -45,6 +46,7 @@ __all__ = [
'WAV2VEC2_ASR_LARGE_LV60K_100H'
,
'WAV2VEC2_ASR_LARGE_LV60K_100H'
,
'WAV2VEC2_ASR_LARGE_LV60K_960H'
,
'WAV2VEC2_ASR_LARGE_LV60K_960H'
,
'WAV2VEC2_XLSR53'
,
'WAV2VEC2_XLSR53'
,
'VOXPOPULI_ASR_BASE_10K_ES'
,
'VOXPOPULI_ASR_BASE_10K_FR'
,
'VOXPOPULI_ASR_BASE_10K_FR'
,
'HUBERT_BASE'
,
'HUBERT_BASE'
,
'HUBERT_LARGE'
,
'HUBERT_LARGE'
,
...
...
torchaudio/pipelines/_wav2vec2/impl.py
View file @
3a599315
...
@@ -986,6 +986,52 @@ Please refer to :func:`torchaudio.pipelines.Wav2Vec2ASRBundle` for the usage.
...
@@ -986,6 +986,52 @@ Please refer to :func:`torchaudio.pipelines.Wav2Vec2ASRBundle` for the usage.
"""
# noqa: E501
"""
# noqa: E501
VOXPOPULI_ASR_BASE_10K_ES
=
Wav2Vec2ASRBundle
(
'wav2vec2_voxpopuli_base_10k_asr_es.pt'
,
{
"extractor_mode"
:
"group_norm"
,
"extractor_conv_layer_config"
:
[
(
512
,
10
,
5
),
(
512
,
3
,
2
),
(
512
,
3
,
2
),
(
512
,
3
,
2
),
(
512
,
3
,
2
),
(
512
,
2
,
2
),
(
512
,
2
,
2
),
],
"extractor_conv_bias"
:
False
,
"encoder_embed_dim"
:
768
,
"encoder_projection_dropout"
:
0.0
,
"encoder_pos_conv_kernel"
:
128
,
"encoder_pos_conv_groups"
:
16
,
"encoder_num_layers"
:
12
,
"encoder_num_heads"
:
12
,
"encoder_attention_dropout"
:
0.0
,
"encoder_ff_interm_features"
:
3072
,
"encoder_ff_interm_dropout"
:
0.1
,
"encoder_dropout"
:
0.0
,
"encoder_layer_norm_first"
:
False
,
"encoder_layer_drop"
:
0.1
,
"aux_num_out"
:
35
},
_labels
=
utils
.
_get_es_labels
(),
_sample_rate
=
16000
,
_remove_aux_axis
=
(
1
,
2
,
3
,
35
),
)
VOXPOPULI_ASR_BASE_10K_ES
.
__doc__
=
"""wav2vec 2.0 model with "Base" configuration.
Pre-trained on 10k hours of unlabeled audio from *VoxPopuli* dataset [:footcite:`voxpopuli`]
("10k" subset, consisting of 23 languages).
Fine-tuned for ASR on 166 hours of transcribed audio from "es" subset.
Originally published by the authors of *VoxPopuli* [:footcite:`voxpopuli`] under CC BY-NC 4.0 and
redistributed with the same license.
[`License <https://github.com/facebookresearch/voxpopuli/tree/160e4d7915bad9f99b2c35b1d3833e51fd30abf2#license>`__,
`Source <https://github.com/facebookresearch/voxpopuli/tree/160e4d7915bad9f99b2c35b1d3833e51fd30abf2#asr-and-lm>`__]
Please refer to :func:`torchaudio.pipelines.Wav2Vec2ASRBundle` for the usage.
"""
# noqa: E501
VOXPOPULI_ASR_BASE_10K_FR
=
Wav2Vec2ASRBundle
(
VOXPOPULI_ASR_BASE_10K_FR
=
Wav2Vec2ASRBundle
(
'wav2vec2_voxpopuli_base_10k_asr_fr.pt'
,
'wav2vec2_voxpopuli_base_10k_asr_fr.pt'
,
{
{
...
...
torchaudio/pipelines/_wav2vec2/utils.py
View file @
3a599315
...
@@ -31,6 +31,45 @@ def _get_en_labels():
...
@@ -31,6 +31,45 @@ def _get_en_labels():
)
)
def
_get_es_labels
():
return
(
"|"
,
"e"
,
"a"
,
"o"
,
"s"
,
"n"
,
"r"
,
"i"
,
"l"
,
"d"
,
"c"
,
"t"
,
"u"
,
"p"
,
"m"
,
"b"
,
"q"
,
"y"
,
"g"
,
"v"
,
"h"
,
"ó"
,
"f"
,
"í"
,
"á"
,
"j"
,
"z"
,
"ñ"
,
"é"
,
"x"
,
"ú"
,
"k"
,
"w"
,
"ü"
,
)
def
_get_fr_labels
():
def
_get_fr_labels
():
return
(
return
(
"|"
,
"|"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment