Improve GPT2 doc (#18787)

* Minor typo in GPT2 doc * improve gpt2 label doc * update dim of label in GPT2ForTokenClassification * add change to tf

Improve GPT2 doc (#18787)
* Minor typo in GPT2 doc * improve gpt2 label doc * update dim of label in GPT2ForTokenClassification * add change to tf
f210e2a4 · Ekagra Ranjan · GitHub · 74690b62 · f210e2a4 · f210e2a4
Unverified Commit f210e2a4 authored Aug 31, 2022 by Ekagra Ranjan Committed by GitHub Aug 31, 2022
Showing with 4 additions and 4 deletions

src/transformers/models/gpt2/modeling_gpt2.py src/transformers/models/gpt2/modeling_gpt2.py +3 -3

src/transformers/models/gpt2/modeling_tf_gpt2.py src/transformers/models/gpt2/modeling_tf_gpt2.py +1 -1

No files found.
--- a/src/transformers/models/gpt2/modeling_gpt2.py
+++ b/src/transformers/models/gpt2/modeling_gpt2.py
@@ -1225,10 +1225,10 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
        r"""
        mc_token_ids (`torch.LongTensor` of shape `(batch_size, num_choices)`, *optional*, default to index of the last token of the input):
            Index of the classification token in each input sequence. Selected in the range `[0, input_ids.size(-1) -
-            1[`.
+            1]`.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set
-            `labels = input_ids` Indices are selected in `[-100, 0, ..., config.vocab_size - 1]` All labels set to
+            `labels = input_ids`. Indices are selected in `[-100, 0, ..., config.vocab_size - 1]`. All labels set to
            `-100` are ignored (masked), the loss is only computed for labels in `[0, ..., config.vocab_size - 1]`
        mc_labels (`torch.LongTensor` of shape `(batch_size)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
@@ -1519,7 +1519,7 @@ class GPT2ForTokenClassification(GPT2PreTrainedModel):
        return_dict: Optional[bool] = None,
    ) -> Union[Tuple, TokenClassifierOutput]:
        r"""
-        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
+        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

--- a/src/transformers/models/gpt2/modeling_tf_gpt2.py
+++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py
@@ -983,7 +983,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
        r"""
        mc_token_ids (`tf.Tensor` or `Numpy array` of shape `(batch_size, num_choices)`, *optional*, default to index of the last token of the input):
            Index of the classification token in each input sequence. Selected in the range `[0, input_ids.size(-1) -
-            1[`.
+            1]`.
        Return: