Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
5c8541b7
"docs/faq.mdx" did not exist on "f1548ef62d03ffefc6b23c615c1361e4cc2b6bea"
Unverified
Commit
5c8541b7
authored
Nov 02, 2021
by
moto
Committed by
GitHub
Nov 02, 2021
Browse files
Add wav2vec2 ASR Italian pretrained model from voxpopuli (#1954)
parent
108e93af
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
101 additions
and
0 deletions
+101
-0
docs/source/pipelines.rst
docs/source/pipelines.rst
+8
-0
test/integration_tests/conftest.py
test/integration_tests/conftest.py
+1
-0
test/integration_tests/wav2vec2_pipeline_test.py
test/integration_tests/wav2vec2_pipeline_test.py
+2
-0
torchaudio/pipelines/__init__.py
torchaudio/pipelines/__init__.py
+2
-0
torchaudio/pipelines/_wav2vec2/impl.py
torchaudio/pipelines/_wav2vec2/impl.py
+47
-0
torchaudio/pipelines/_wav2vec2/utils.py
torchaudio/pipelines/_wav2vec2/utils.py
+41
-0
No files found.
docs/source/pipelines.rst
View file @
5c8541b7
...
...
@@ -177,6 +177,14 @@ VOXPOPULI_ASR_BASE_10K_FR
.. autodata:: VOXPOPULI_ASR_BASE_10K_FR
:no-value:
VOXPOPULI_ASR_BASE_10K_IT
~~~~~~~~~~~~~~~~~~~~~~~~~
.. container:: py attribute
.. autodata:: VOXPOPULI_ASR_BASE_10K_IT
:no-value:
HUBERT_ASR_LARGE
~~~~~~~~~~~~~~~~
...
...
test/integration_tests/conftest.py
View file @
5c8541b7
...
...
@@ -37,6 +37,7 @@ _FILES = {
'de'
:
'20090505-0900-PLENARY-16-de_20090505-21_56_00_8.flac'
,
'es'
:
'20130207-0900-PLENARY-7-es_20130207-13_02_05_5.flac'
,
'fr'
:
'20121212-0900-PLENARY-5-fr_20121212-11_37_04_10.flac'
,
'it'
:
'20170516-0900-PLENARY-16-it_20170516-18_56_31_1.flac'
,
}
...
...
test/integration_tests/wav2vec2_pipeline_test.py
View file @
5c8541b7
...
...
@@ -21,6 +21,7 @@ from torchaudio.pipelines import (
VOXPOPULI_ASR_BASE_10K_ES
,
VOXPOPULI_ASR_BASE_10K_DE
,
VOXPOPULI_ASR_BASE_10K_FR
,
VOXPOPULI_ASR_BASE_10K_IT
,
)
import
pytest
...
...
@@ -59,6 +60,7 @@ def test_pretraining_models(bundle):
(
VOXPOPULI_ASR_BASE_10K_ES
,
'es'
,
"la|primera|que|es|imprescindible|pensar|a|pequeña|a|escala|para|implicar|y|complementar|así|la|actuación|global"
),
# noqa: E501
(
VOXPOPULI_ASR_BASE_10K_DE
,
'de'
,
"dabei|spielt|auch|eine|sorgfältige|berichterstattung|eine|wichtige|rolle"
),
(
VOXPOPULI_ASR_BASE_10K_FR
,
'fr'
,
'la|commission|va|faire|des|propositions|sur|ce|sujet|comment|mettre|en|place|cette|capacité|fiscale|et|le|conseil|européen|y|reviendra|sour|les|sujets|au|moins|de|mars'
),
# noqa: E501
(
VOXPOPULI_ASR_BASE_10K_IT
,
'it'
,
'credo|che|illatino|non|sia|contemplato|tra|le|traduzioni|e|quindi|mi|attengo|allitaliano'
)
# noqa: E501
]
)
def
test_finetune_asr_model
(
...
...
torchaudio/pipelines/__init__.py
View file @
5c8541b7
...
...
@@ -17,6 +17,7 @@ from ._wav2vec2.impl import (
VOXPOPULI_ASR_BASE_10K_ES
,
VOXPOPULI_ASR_BASE_10K_DE
,
VOXPOPULI_ASR_BASE_10K_FR
,
VOXPOPULI_ASR_BASE_10K_IT
,
HUBERT_BASE
,
HUBERT_LARGE
,
HUBERT_XLARGE
,
...
...
@@ -50,6 +51,7 @@ __all__ = [
'VOXPOPULI_ASR_BASE_10K_ES'
,
'VOXPOPULI_ASR_BASE_10K_DE'
,
'VOXPOPULI_ASR_BASE_10K_FR'
,
'VOXPOPULI_ASR_BASE_10K_IT'
,
'HUBERT_BASE'
,
'HUBERT_LARGE'
,
'HUBERT_XLARGE'
,
...
...
torchaudio/pipelines/_wav2vec2/impl.py
View file @
5c8541b7
...
...
@@ -1123,3 +1123,50 @@ redistributed with the same license.
Please refer to :func:`torchaudio.pipelines.Wav2Vec2ASRBundle` for the usage.
"""
# noqa: E501
VOXPOPULI_ASR_BASE_10K_IT
=
Wav2Vec2ASRBundle
(
'wav2vec2_voxpopuli_base_10k_asr_it.pt'
,
{
"extractor_mode"
:
"group_norm"
,
"extractor_conv_layer_config"
:
[
(
512
,
10
,
5
),
(
512
,
3
,
2
),
(
512
,
3
,
2
),
(
512
,
3
,
2
),
(
512
,
3
,
2
),
(
512
,
2
,
2
),
(
512
,
2
,
2
),
],
"extractor_conv_bias"
:
False
,
"encoder_embed_dim"
:
768
,
"encoder_projection_dropout"
:
0.0
,
"encoder_pos_conv_kernel"
:
128
,
"encoder_pos_conv_groups"
:
16
,
"encoder_num_layers"
:
12
,
"encoder_num_heads"
:
12
,
"encoder_attention_dropout"
:
0.0
,
"encoder_ff_interm_features"
:
3072
,
"encoder_ff_interm_dropout"
:
0.1
,
"encoder_dropout"
:
0.0
,
"encoder_layer_norm_first"
:
False
,
"encoder_layer_drop"
:
0.1
,
"aux_num_out"
:
37
,
},
_labels
=
utils
.
_get_it_labels
(),
_sample_rate
=
16000
,
_remove_aux_axis
=
(
1
,
2
,
3
),
)
VOXPOPULI_ASR_BASE_10K_IT
.
__doc__
=
"""wav2vec 2.0 model with "Base" configuration.
Pre-trained on 10k hours of unlabeled audio from *VoxPopuli* dataset [:footcite:`voxpopuli`]
("10k" subset, consisting of 23 languages).
Fine-tuned for ASR on 91 hours of transcribed audio from "it" subset.
Originally published by the authors of *VoxPopuli* [:footcite:`voxpopuli`] under CC BY-NC 4.0 and
redistributed with the same license.
[`License <https://github.com/facebookresearch/voxpopuli/tree/160e4d7915bad9f99b2c35b1d3833e51fd30abf2#license>`__,
`Source <https://github.com/facebookresearch/voxpopuli/tree/160e4d7915bad9f99b2c35b1d3833e51fd30abf2#asr-and-lm>`__]
Please refer to :func:`torchaudio.pipelines.Wav2Vec2ASRBundle` for the usage.
"""
# noqa: E501
torchaudio/pipelines/_wav2vec2/utils.py
View file @
5c8541b7
...
...
@@ -151,3 +151,44 @@ def _get_fr_labels():
"ü"
,
"æ"
,
)
def
_get_it_labels
():
return
(
"|"
,
"e"
,
"i"
,
"a"
,
"o"
,
"n"
,
"t"
,
"r"
,
"l"
,
"s"
,
"c"
,
"d"
,
"u"
,
"p"
,
"m"
,
"g"
,
"v"
,
"h"
,
"z"
,
"f"
,
"b"
,
"q"
,
"à"
,
"è"
,
"ù"
,
"é"
,
"ò"
,
"ì"
,
"k"
,
"y"
,
"x"
,
"w"
,
"j"
,
"ó"
,
"í"
,
"ï"
,
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment