Fix PatchTSMixer Docstrings (#27943)

* docstring corrections * style make --------- Co-authored-by: vijaye12 <vijaye12@in.ibm.com>

Fix PatchTSMixer Docstrings (#27943)
* docstring corrections * style make --------- Co-authored-by: vijaye12 <vijaye12@in.ibm.com>
6ff10922 · vijaye12 · GitHub · accccdd0 · 6ff10922 · 6ff10922
Unverified Commit 6ff10922 authored Dec 11, 2023 by vijaye12 Committed by GitHub Dec 11, 2023
2 changed files
--- a/src/transformers/models/patchtsmixer/configuration_patchtsmixer.py
+++ b/src/transformers/models/patchtsmixer/configuration_patchtsmixer.py
@@ -49,8 +49,6 @@ class PatchTSMixerConfig(PretrainedConfig):
            non-overlapping patches.
        num_parallel_samples (`int`, *optional*, defaults to 100):
            The number of samples to generate in parallel for probabilistic forecast.
-
-
        d_model (`int`, *optional*, defaults to 8):
            Hidden dimension of the model. Recommended to set it as a multiple of patch_length (i.e. 2-5X of
            patch_len). Larger value indicates more complex model.
@@ -96,8 +94,6 @@ class PatchTSMixerConfig(PretrainedConfig):
            `PyTorch`. Setting it to `False` performs `PyTorch` weight initialization.
        norm_eps (`float`, *optional*, defaults to 1e-05):
            A value added to the denominator for numerical stability of normalization.
-
-
        mask_type (`str`, *optional*, defaults to `"random"`):
            Type of masking to use for Masked Pretraining mode. Allowed values are "random", "forecast". In Random
            masking, points are masked randomly. In Forecast masking, points are masked towards the end.
@@ -116,9 +112,6 @@ class PatchTSMixerConfig(PretrainedConfig):
            across channels.
        unmasked_channel_indices (`list`, *optional*):
            Channels that are not masked during pretraining.
-
-
-
        head_dropout (`float`, *optional*, defaults to 0.2):
            The dropout probability the `PatchTSMixer` head.
        distribution_output (`string`, *optional*, defaults to `"student_t"`):

--- a/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py
+++ b/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py
@@ -1334,11 +1334,11 @@ class PatchTSMixerModel(PatchTSMixerPreTrainedModel):
        return_dict: Optional[bool] = None,
    ) -> PatchTSMixerModelOutput:
        r"""
-            observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
-                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
-                in `[0, 1]`:
-                    - 1 for values that are **observed**,
-                    - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
+        observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
+            Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
+            in `[0, 1]`:
+                - 1 for values that are **observed**,
+                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).

        Returns:

@@ -1446,13 +1446,13 @@ class PatchTSMixerForPretraining(PatchTSMixerPreTrainedModel):
        return_dict: Optional[bool] = None,
    ) -> PatchTSMixerForPreTrainingOutput:
        r"""
-            observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
-                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
-                in `[0, 1]`:
-                    - 1 for values that are **observed**,
-                    - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
-            return_loss (`bool`,  *optional*):
-                Whether to return the loss in the `forward` call.
+        observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
+            Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
+            in `[0, 1]`:
+                - 1 for values that are **observed**,
+                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
+        return_loss (`bool`,  *optional*):
+            Whether to return the loss in the `forward` call.

        Returns:

@@ -1650,24 +1650,23 @@ class PatchTSMixerForPrediction(PatchTSMixerPreTrainedModel):
        return_dict: Optional[bool] = None,
    ) -> PatchTSMixerForPredictionOutput:
        r"""
-            observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
-                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
-                in `[0, 1]`:
-                    - 1 for values that are **observed**,
-                    - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
-            future_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,:
-                `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*): Target
-                values of the time series, that serve as labels for the model. The `future_values` is what the
-                Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
-                required for a pretraining task.
-
-                For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
-                to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
-                pass the target data with all channels, as channel Filtering for both prediction and target will be
-                manually applied before the loss computation.
-
-            return_loss (`bool`,  *optional*):
-                Whether to return the loss in the `forward` call.
+        observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
+            Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
+            in `[0, 1]`:
+                - 1 for values that are **observed**,
+                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
+        future_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,:
+            `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*): Target
+            values of the time series, that serve as labels for the model. The `future_values` is what the
+            Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
+            required for a pretraining task.
+
+            For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
+            to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
+            pass the target data with all channels, as channel Filtering for both prediction and target will be
+            manually applied before the loss computation.
+        return_loss (`bool`,  *optional*):
+            Whether to return the loss in the `forward` call.

        Returns:

@@ -1871,22 +1870,22 @@ class PatchTSMixerForTimeSeriesClassification(PatchTSMixerPreTrainedModel):
        return_dict: Optional[bool] = None,
    ) -> PatchTSMixerForTimeSeriesClassificationOutput:
        r"""
-            future_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,
-                `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*): Target
-                values of the time series, that serve as labels for the model. The `future_values` is what the
-                Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
-                required for a pretraining task.
+        future_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,
+            `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*): Target
+            values of the time series, that serve as labels for the model. The `future_values` is what the
+            Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
+            required for a pretraining task.

-                For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
-                to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
-                pass the target data with all channels, as channel Filtering for both prediction and target will be
-                manually applied before the loss computation.
+            For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
+            to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
+            pass the target data with all channels, as channel Filtering for both prediction and target will be
+            manually applied before the loss computation.

-                For a classification task, it has a shape of `(batch_size,)`.
+            For a classification task, it has a shape of `(batch_size,)`.

-                For a regression task, it has a shape of `(batch_size, num_targets)`.
-            return_loss (`bool`, *optional*):
-                Whether to return the loss in the `forward` call.
+            For a regression task, it has a shape of `(batch_size, num_targets)`.
+        return_loss (`bool`, *optional*):
+            Whether to return the loss in the `forward` call.

        Returns:

@@ -2061,22 +2060,22 @@ class PatchTSMixerForRegression(PatchTSMixerPreTrainedModel):
        return_dict: Optional[bool] = None,
    ) -> PatchTSMixerForRegressionOutput:
        r"""
-            future_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,
-                `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*): Target
-                values of the time series, that serve as labels for the model. The `future_values` is what the
-                Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
-                required for a pretraining task.
-
-                For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
-                to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
-                pass the target data with all channels, as channel Filtering for both prediction and target will be
-                manually applied before the loss computation.
-
-                For a classification task, it has a shape of `(batch_size,)`.
-
-                For a regression task, it has a shape of `(batch_size, num_targets)`.
-            return_loss (`bool`, *optional*):
-                Whether to return the loss in the `forward` call.
+        future_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,
+            `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*): Target
+            values of the time series, that serve as labels for the model. The `future_values` is what the
+            Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
+            required for a pretraining task.
+
+            For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
+            to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
+            pass the target data with all channels, as channel Filtering for both prediction and target will be
+            manually applied before the loss computation.
+
+            For a classification task, it has a shape of `(batch_size,)`.
+
+            For a regression task, it has a shape of `(batch_size, num_targets)`.
+        return_loss (`bool`, *optional*):
+            Whether to return the loss in the `forward` call.

        Returns: