pipelines.rst 6.36 KB
Newer Older
1
2
3
4
5
torchaudio.pipelines
====================

.. currentmodule:: torchaudio.pipelines

moto's avatar
moto committed
6
7
.. py:module:: torchaudio.pipelines
		   
8
9
The pipelines subpackage contains API to access the models with pretrained weights, and information/helper functions associated the pretrained weights.

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
RNN-T Streaming/Non-Streaming ASR
---------------------------------

RNNTBundle
~~~~~~~~~~

.. autoclass:: RNNTBundle
  :members: sample_rate, n_fft, n_mels, hop_length, segment_length, right_context_length

  .. automethod:: get_decoder() -> torchaudio.models.RNNTBeamSearch

  .. automethod:: get_feature_extractor() -> RNNTBundle.FeatureExtractor

  .. automethod:: get_streaming_feature_extractor() -> RNNTBundle.FeatureExtractor

  .. automethod:: get_token_processor() -> RNNTBundle.TokenProcessor

RNNTBundle - FeatureExtractor
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autoclass:: torchaudio.pipelines::RNNTBundle.FeatureExtractor
  :special-members: __call__

RNNTBundle - TokenProcessor
~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autoclass:: torchaudio.pipelines::RNNTBundle.TokenProcessor
  :special-members: __call__

EMFORMER_RNNT_BASE_LIBRISPEECH
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. container:: py attribute

   .. autodata:: EMFORMER_RNNT_BASE_LIBRISPEECH
      :no-value:


48
49
50
51
wav2vec 2.0 / HuBERT - Representation Learning
----------------------------------------------

.. autoclass:: Wav2Vec2Bundle
52
   :members: sample_rate
53
54
55
56

   .. automethod:: get_model

WAV2VEC2_BASE
57
~~~~~~~~~~~~~
58
59
60
61
62
63
64

.. container:: py attribute

   .. autodata:: WAV2VEC2_BASE
      :no-value:

WAV2VEC2_LARGE
65
~~~~~~~~~~~~~~
66
67
68
69
70
71
72

.. container:: py attribute

   .. autodata:: WAV2VEC2_LARGE
      :no-value:

WAV2VEC2_LARGE_LV60K
73
~~~~~~~~~~~~~~~~~~~~
74
75
76
77
78
79
80
81

.. container:: py attribute

   .. autodata:: WAV2VEC2_LARGE_LV60K
      :no-value:


WAV2VEC2_XLSR53
82
~~~~~~~~~~~~~~~
83
84
85
86
87
88
89

.. container:: py attribute

   .. autodata:: WAV2VEC2_XLSR53
      :no-value:

HUBERT_BASE
90
~~~~~~~~~~~
91
92
93
94
95
96
97

.. container:: py attribute

   .. autodata:: HUBERT_BASE
      :no-value:

HUBERT_LARGE
98
~~~~~~~~~~~~
99
100
101
102
103
104
105

.. container:: py attribute

   .. autodata:: HUBERT_LARGE
      :no-value:

HUBERT_XLARGE
106
~~~~~~~~~~~~~
107
108
109
110
111
112

.. container:: py attribute

   .. autodata:: HUBERT_XLARGE
      :no-value:

113
114
wav2vec 2.0 / HuBERT - Fine-tuned ASR
-------------------------------------
115

moto's avatar
moto committed
116
117
118
Wav2Vec2ASRBundle
~~~~~~~~~~~~~~~~~

119
.. autoclass:: Wav2Vec2ASRBundle
120
   :members: sample_rate
121
122
123
124
125
126

   .. automethod:: get_model

   .. automethod:: get_labels

WAV2VEC2_ASR_BASE_10M
127
~~~~~~~~~~~~~~~~~~~~~
128
129
130
131
132
133
134

.. container:: py attribute

   .. autodata:: WAV2VEC2_ASR_BASE_10M
      :no-value:

WAV2VEC2_ASR_BASE_100H
135
~~~~~~~~~~~~~~~~~~~~~~
136
137
138
139
140
141
142
      
.. container:: py attribute

   .. autodata:: WAV2VEC2_ASR_BASE_100H
      :no-value:

WAV2VEC2_ASR_BASE_960H
143
~~~~~~~~~~~~~~~~~~~~~~
144
145
146
147
148
149
150

.. container:: py attribute

   .. autodata:: WAV2VEC2_ASR_BASE_960H
      :no-value:

WAV2VEC2_ASR_LARGE_10M
151
~~~~~~~~~~~~~~~~~~~~~~
152
153
154
155
156
157
158

.. container:: py attribute

   .. autodata:: WAV2VEC2_ASR_LARGE_10M
      :no-value:

WAV2VEC2_ASR_LARGE_100H
159
~~~~~~~~~~~~~~~~~~~~~~~
160
161
162
163
164
165
166

.. container:: py attribute

   .. autodata:: WAV2VEC2_ASR_LARGE_100H
      :no-value:

WAV2VEC2_ASR_LARGE_960H
167
~~~~~~~~~~~~~~~~~~~~~~~
168
169
170
171
172
173
174

.. container:: py attribute

   .. autodata:: WAV2VEC2_ASR_LARGE_960H
      :no-value:

WAV2VEC2_ASR_LARGE_LV60K_10M
175
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
176
177
178
179
180
181
182

.. container:: py attribute

   .. autodata:: WAV2VEC2_ASR_LARGE_LV60K_10M
      :no-value:

WAV2VEC2_ASR_LARGE_LV60K_100H
183
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
184
185
186
187
188
189
190

.. container:: py attribute

   .. autodata:: WAV2VEC2_ASR_LARGE_LV60K_100H
      :no-value:

WAV2VEC2_ASR_LARGE_LV60K_960H
191
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
192
193
194
195
196
197

.. container:: py attribute

   .. autodata:: WAV2VEC2_ASR_LARGE_LV60K_960H
      :no-value:

198
199
200
201
202
203
204
205
VOXPOPULI_ASR_BASE_10K_DE
~~~~~~~~~~~~~~~~~~~~~~~~~

.. container:: py attribute

   .. autodata:: VOXPOPULI_ASR_BASE_10K_DE
      :no-value:

206
207
208
209
210
211
212
213
VOXPOPULI_ASR_BASE_10K_EN
~~~~~~~~~~~~~~~~~~~~~~~~~

.. container:: py attribute

   .. autodata:: VOXPOPULI_ASR_BASE_10K_EN
      :no-value:

214
215
216
217
218
219
220
221
VOXPOPULI_ASR_BASE_10K_ES
~~~~~~~~~~~~~~~~~~~~~~~~~

.. container:: py attribute

   .. autodata:: VOXPOPULI_ASR_BASE_10K_ES
      :no-value:

222
223
224
225
226
227
228
229
VOXPOPULI_ASR_BASE_10K_FR
~~~~~~~~~~~~~~~~~~~~~~~~~

.. container:: py attribute

   .. autodata:: VOXPOPULI_ASR_BASE_10K_FR
      :no-value:

230
231
232
233
234
235
236
237
VOXPOPULI_ASR_BASE_10K_IT
~~~~~~~~~~~~~~~~~~~~~~~~~

.. container:: py attribute

   .. autodata:: VOXPOPULI_ASR_BASE_10K_IT
      :no-value:

238
HUBERT_ASR_LARGE
239
~~~~~~~~~~~~~~~~
240
241
242
243
244
245
246

.. container:: py attribute

   .. autodata:: HUBERT_ASR_LARGE
      :no-value:

HUBERT_ASR_XLARGE
247
~~~~~~~~~~~~~~~~~
248
249
250
251

.. container:: py attribute

   .. autodata:: HUBERT_ASR_XLARGE
moto's avatar
moto committed
252
      :no-value:
moto's avatar
moto committed
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314

Tacotron2 Text-To-Speech
------------------------

Tacotron2TTSBundle
~~~~~~~~~~~~~~~~~~

.. autoclass:: Tacotron2TTSBundle

   .. automethod:: get_text_processor

   .. automethod:: get_tacotron2

   .. automethod:: get_vocoder

Tacotron2TTSBundle - TextProcessor
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autoclass:: torchaudio.pipelines::Tacotron2TTSBundle.TextProcessor
   :members: tokens
   :special-members: __call__


Tacotron2TTSBundle - Vocoder
~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autoclass:: torchaudio.pipelines::Tacotron2TTSBundle.Vocoder
   :members: sample_rate
   :special-members: __call__


TACOTRON2_WAVERNN_PHONE_LJSPEECH
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. container:: py attribute

   .. autodata:: TACOTRON2_WAVERNN_PHONE_LJSPEECH
      :no-value:


TACOTRON2_WAVERNN_CHAR_LJSPEECH
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. container:: py attribute

   .. autodata:: TACOTRON2_WAVERNN_CHAR_LJSPEECH
      :no-value:

TACOTRON2_GRIFFINLIM_PHONE_LJSPEECH
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. container:: py attribute

   .. autodata:: TACOTRON2_GRIFFINLIM_PHONE_LJSPEECH
      :no-value:

TACOTRON2_GRIFFINLIM_CHAR_LJSPEECH
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. container:: py attribute

   .. autodata:: TACOTRON2_GRIFFINLIM_CHAR_LJSPEECH
315
316
      :no-value:

317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
Source Separation
-----------------

SourceSeparationBundle
~~~~~~~~~~~~~~~~~~~~~~

.. autoclass:: SourceSeparationBundle
   :members: sample_rate

   .. automethod:: get_model


CONVTASNET_BASE_LIBRI2MIX
~~~~~~~~~~~~~~~~~~~~~~~~~

.. container:: py attribute

   .. autodata:: CONVTASNET_BASE_LIBRI2MIX
      :no-value:

337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
HDEMUCS_HIGH_MUSDB_PLUS
~~~~~~~~~~~~~~~~~~~~~~~

.. container:: py attribute

   .. autodata:: HDEMUCS_HIGH_MUSDB_PLUS
      :no-value:

HDEMUCS_HIGH_MUSDB
~~~~~~~~~~~~~~~~~~

.. container:: py attribute

   .. autodata:: HDEMUCS_HIGH_MUSDB
      :no-value: