Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
hehl2
Torchaudio
Commits
f2eec77b
"...text-generation-inference.git" did not exist on "41c2623735819bcb370063795127153dcee1e7a8"
Unverified
Commit
f2eec77b
authored
Nov 03, 2021
by
moto
Committed by
GitHub
Nov 03, 2021
Browse files
Add wav2vec2 ASR English pretrained model from voxpopuli (#1956)
parent
af336d66
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
89 additions
and
0 deletions
+89
-0
docs/source/pipelines.rst
docs/source/pipelines.rst
+8
-0
test/integration_tests/conftest.py
test/integration_tests/conftest.py
+1
-0
test/integration_tests/wav2vec2_pipeline_test.py
test/integration_tests/wav2vec2_pipeline_test.py
+2
-0
torchaudio/pipelines/__init__.py
torchaudio/pipelines/__init__.py
+2
-0
torchaudio/pipelines/_wav2vec2/impl.py
torchaudio/pipelines/_wav2vec2/impl.py
+44
-0
torchaudio/pipelines/_wav2vec2/utils.py
torchaudio/pipelines/_wav2vec2/utils.py
+32
-0
No files found.
docs/source/pipelines.rst
View file @
f2eec77b
...
@@ -161,6 +161,14 @@ VOXPOPULI_ASR_BASE_10K_DE
...
@@ -161,6 +161,14 @@ VOXPOPULI_ASR_BASE_10K_DE
.. autodata:: VOXPOPULI_ASR_BASE_10K_DE
.. autodata:: VOXPOPULI_ASR_BASE_10K_DE
:no-value:
:no-value:
VOXPOPULI_ASR_BASE_10K_EN
~~~~~~~~~~~~~~~~~~~~~~~~~
.. container:: py attribute
.. autodata:: VOXPOPULI_ASR_BASE_10K_EN
:no-value:
VOXPOPULI_ASR_BASE_10K_ES
VOXPOPULI_ASR_BASE_10K_ES
~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~
...
...
test/integration_tests/conftest.py
View file @
f2eec77b
...
@@ -35,6 +35,7 @@ def ctc_decoder():
...
@@ -35,6 +35,7 @@ def ctc_decoder():
_FILES
=
{
_FILES
=
{
'en'
:
'Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.flac'
,
'en'
:
'Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.flac'
,
'de'
:
'20090505-0900-PLENARY-16-de_20090505-21_56_00_8.flac'
,
'de'
:
'20090505-0900-PLENARY-16-de_20090505-21_56_00_8.flac'
,
'en2'
:
'20120613-0900-PLENARY-8-en_20120613-13_46_50_3.flac'
,
'es'
:
'20130207-0900-PLENARY-7-es_20130207-13_02_05_5.flac'
,
'es'
:
'20130207-0900-PLENARY-7-es_20130207-13_02_05_5.flac'
,
'fr'
:
'20121212-0900-PLENARY-5-fr_20121212-11_37_04_10.flac'
,
'fr'
:
'20121212-0900-PLENARY-5-fr_20121212-11_37_04_10.flac'
,
'it'
:
'20170516-0900-PLENARY-16-it_20170516-18_56_31_1.flac'
,
'it'
:
'20170516-0900-PLENARY-16-it_20170516-18_56_31_1.flac'
,
...
...
test/integration_tests/wav2vec2_pipeline_test.py
View file @
f2eec77b
...
@@ -18,6 +18,7 @@ from torchaudio.pipelines import (
...
@@ -18,6 +18,7 @@ from torchaudio.pipelines import (
HUBERT_XLARGE
,
HUBERT_XLARGE
,
HUBERT_ASR_LARGE
,
HUBERT_ASR_LARGE
,
HUBERT_ASR_XLARGE
,
HUBERT_ASR_XLARGE
,
VOXPOPULI_ASR_BASE_10K_EN
,
VOXPOPULI_ASR_BASE_10K_ES
,
VOXPOPULI_ASR_BASE_10K_ES
,
VOXPOPULI_ASR_BASE_10K_DE
,
VOXPOPULI_ASR_BASE_10K_DE
,
VOXPOPULI_ASR_BASE_10K_FR
,
VOXPOPULI_ASR_BASE_10K_FR
,
...
@@ -57,6 +58,7 @@ def test_pretraining_models(bundle):
...
@@ -57,6 +58,7 @@ def test_pretraining_models(bundle):
(
WAV2VEC2_ASR_LARGE_LV60K_960H
,
'en'
,
'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'
),
(
WAV2VEC2_ASR_LARGE_LV60K_960H
,
'en'
,
'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'
),
(
HUBERT_ASR_LARGE
,
'en'
,
'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'
),
(
HUBERT_ASR_LARGE
,
'en'
,
'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'
),
(
HUBERT_ASR_XLARGE
,
'en'
,
'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'
),
(
HUBERT_ASR_XLARGE
,
'en'
,
'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'
),
(
VOXPOPULI_ASR_BASE_10K_EN
,
'en2'
,
'i|hope|that|we|will|see|a|ddrasstic|decrease|of|funding|for|the|failed|eu|project|and|that|more|money|will|come|back|to|the|taxpayers'
),
# noqa: E501
(
VOXPOPULI_ASR_BASE_10K_ES
,
'es'
,
"la|primera|que|es|imprescindible|pensar|a|pequeña|a|escala|para|implicar|y|complementar|así|la|actuación|global"
),
# noqa: E501
(
VOXPOPULI_ASR_BASE_10K_ES
,
'es'
,
"la|primera|que|es|imprescindible|pensar|a|pequeña|a|escala|para|implicar|y|complementar|así|la|actuación|global"
),
# noqa: E501
(
VOXPOPULI_ASR_BASE_10K_DE
,
'de'
,
"dabei|spielt|auch|eine|sorgfältige|berichterstattung|eine|wichtige|rolle"
),
(
VOXPOPULI_ASR_BASE_10K_DE
,
'de'
,
"dabei|spielt|auch|eine|sorgfältige|berichterstattung|eine|wichtige|rolle"
),
(
VOXPOPULI_ASR_BASE_10K_FR
,
'fr'
,
'la|commission|va|faire|des|propositions|sur|ce|sujet|comment|mettre|en|place|cette|capacité|fiscale|et|le|conseil|européen|y|reviendra|sour|les|sujets|au|moins|de|mars'
),
# noqa: E501
(
VOXPOPULI_ASR_BASE_10K_FR
,
'fr'
,
'la|commission|va|faire|des|propositions|sur|ce|sujet|comment|mettre|en|place|cette|capacité|fiscale|et|le|conseil|européen|y|reviendra|sour|les|sujets|au|moins|de|mars'
),
# noqa: E501
...
...
torchaudio/pipelines/__init__.py
View file @
f2eec77b
...
@@ -14,6 +14,7 @@ from ._wav2vec2.impl import (
...
@@ -14,6 +14,7 @@ from ._wav2vec2.impl import (
WAV2VEC2_ASR_LARGE_LV60K_100H
,
WAV2VEC2_ASR_LARGE_LV60K_100H
,
WAV2VEC2_ASR_LARGE_LV60K_960H
,
WAV2VEC2_ASR_LARGE_LV60K_960H
,
WAV2VEC2_XLSR53
,
WAV2VEC2_XLSR53
,
VOXPOPULI_ASR_BASE_10K_EN
,
VOXPOPULI_ASR_BASE_10K_ES
,
VOXPOPULI_ASR_BASE_10K_ES
,
VOXPOPULI_ASR_BASE_10K_DE
,
VOXPOPULI_ASR_BASE_10K_DE
,
VOXPOPULI_ASR_BASE_10K_FR
,
VOXPOPULI_ASR_BASE_10K_FR
,
...
@@ -48,6 +49,7 @@ __all__ = [
...
@@ -48,6 +49,7 @@ __all__ = [
'WAV2VEC2_ASR_LARGE_LV60K_100H'
,
'WAV2VEC2_ASR_LARGE_LV60K_100H'
,
'WAV2VEC2_ASR_LARGE_LV60K_960H'
,
'WAV2VEC2_ASR_LARGE_LV60K_960H'
,
'WAV2VEC2_XLSR53'
,
'WAV2VEC2_XLSR53'
,
'VOXPOPULI_ASR_BASE_10K_EN'
,
'VOXPOPULI_ASR_BASE_10K_ES'
,
'VOXPOPULI_ASR_BASE_10K_ES'
,
'VOXPOPULI_ASR_BASE_10K_DE'
,
'VOXPOPULI_ASR_BASE_10K_DE'
,
'VOXPOPULI_ASR_BASE_10K_FR'
,
'VOXPOPULI_ASR_BASE_10K_FR'
,
...
...
torchaudio/pipelines/_wav2vec2/impl.py
View file @
f2eec77b
...
@@ -1033,6 +1033,50 @@ Please refer to :func:`torchaudio.pipelines.Wav2Vec2ASRBundle` for the usage.
...
@@ -1033,6 +1033,50 @@ Please refer to :func:`torchaudio.pipelines.Wav2Vec2ASRBundle` for the usage.
"""
# noqa: E501
"""
# noqa: E501
VOXPOPULI_ASR_BASE_10K_EN
=
Wav2Vec2ASRBundle
(
'wav2vec2_voxpopuli_base_10k_asr_en.pt'
,
{
"extractor_mode"
:
"group_norm"
,
"extractor_conv_layer_config"
:
[
(
512
,
10
,
5
),
(
512
,
3
,
2
),
(
512
,
3
,
2
),
(
512
,
3
,
2
),
(
512
,
3
,
2
),
(
512
,
2
,
2
),
(
512
,
2
,
2
),
],
"extractor_conv_bias"
:
False
,
"encoder_embed_dim"
:
768
,
"encoder_projection_dropout"
:
0.0
,
"encoder_pos_conv_kernel"
:
128
,
"encoder_pos_conv_groups"
:
16
,
"encoder_num_layers"
:
12
,
"encoder_num_heads"
:
12
,
"encoder_attention_dropout"
:
0.0
,
"encoder_ff_interm_features"
:
3072
,
"encoder_ff_interm_dropout"
:
0.1
,
"encoder_dropout"
:
0.0
,
"encoder_layer_norm_first"
:
False
,
"encoder_layer_drop"
:
0.1
,
"aux_num_out"
:
28
},
_labels
=
utils
.
_get_vp_en_labels
(),
_sample_rate
=
16000
,
_remove_aux_axis
=
(
1
,
2
,
3
,
31
),
)
VOXPOPULI_ASR_BASE_10K_EN
.
__doc__
=
"""wav2vec 2.0 model with "Base" configuration.
Pre-trained on 10k hours of unlabeled audio from *VoxPopuli* dataset [:footcite:`voxpopuli`]
("10k" subset, consisting of 23 languages).
Fine-tuned for ASR on 543 hours of transcribed audio from "en" subset.
Originally published by the authors of *VoxPopuli* [:footcite:`voxpopuli`] under CC BY-NC 4.0 and
redistributed with the same license.
[`License <https://github.com/facebookresearch/voxpopuli/tree/160e4d7915bad9f99b2c35b1d3833e51fd30abf2#license>`__,
`Source <https://github.com/facebookresearch/voxpopuli/tree/160e4d7915bad9f99b2c35b1d3833e51fd30abf2#asr-and-lm>`__]
Please refer to :func:`torchaudio.pipelines.Wav2Vec2ASRBundle` for the usage.
"""
# noqa: E501
VOXPOPULI_ASR_BASE_10K_ES
=
Wav2Vec2ASRBundle
(
VOXPOPULI_ASR_BASE_10K_ES
=
Wav2Vec2ASRBundle
(
'wav2vec2_voxpopuli_base_10k_asr_es.pt'
,
'wav2vec2_voxpopuli_base_10k_asr_es.pt'
,
{
{
...
...
torchaudio/pipelines/_wav2vec2/utils.py
View file @
f2eec77b
...
@@ -67,6 +67,38 @@ def _get_de_labels():
...
@@ -67,6 +67,38 @@ def _get_de_labels():
)
)
def
_get_vp_en_labels
():
return
(
"|"
,
"e"
,
"t"
,
"o"
,
"i"
,
"a"
,
"n"
,
"s"
,
"r"
,
"h"
,
"l"
,
"d"
,
"c"
,
"u"
,
"m"
,
"p"
,
"f"
,
"g"
,
"w"
,
"y"
,
"b"
,
"v"
,
"k"
,
"x"
,
"j"
,
"q"
,
"z"
,
)
def
_get_es_labels
():
def
_get_es_labels
():
return
(
return
(
"|"
,
"|"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment