Update input values for docstring (#22631)

5a71977b · amyeroberts · GitHub · fe1f5a63 · 5a71977b
Unverified Commit 5a71977b authored Apr 12, 2023 by amyeroberts Committed by GitHub Apr 12, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 3 deletions

src/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py ...ram_transformer/modeling_audio_spectrogram_transformer.py +6 -3

No files found.
--- a/src/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py
+++ b/src/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py
@@ -414,9 +414,12 @@ AUDIO_SPECTROGRAM_TRANSFORMER_START_DOCSTRING = r"""

 AUDIO_SPECTROGRAM_TRANSFORMER_INPUTS_DOCSTRING = r"""
    Args:
-        input_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
-            Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See
-            [`ASTFeatureExtractor.__call__`] for details.
+        input_values (`torch.FloatTensor` of shape `(batch_size, max_length, num_mel_bins)`):
+            Float values mel features extracted from the raw audio waveform. Raw audio waveform can be obtained by
+            loading a `.flac` or `.wav` audio file into an array of type `List[float]` or a `numpy.ndarray`, *e.g.* via
+            the soundfile library (`pip install soundfile`). To prepare the array into `input_features`, the
+            [`AutoFeatureExtractor`] should be used for extracting the mel features, padding and conversion into a
+            tensor of type `torch.FloatTensor`. See [`~ASTFeatureExtractor.__call__`]

        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`: