Unverified Commit 924484ee authored by Michael Chung's avatar Michael Chung Committed by GitHub
Browse files

Add Doc Test GPT-2 (#16439)



* First Pass All Tests Pass

* WIP

* Adding file to documentation tests

* Change the base model for the example in the doc test.

* Fix Code Styling by running
make fixup

* Called Style

* Reverted to gpt2 model rather than distill gpt2
Then used a token classification model over a sequence model for an example.

* Fix Styling Issue

* Hopefully ignores the formatting issue.
Co-authored-by: default avatarArEnSc <xx.mike.chung.xx@gmail.com>
parent 70851a6b
...@@ -1231,10 +1231,8 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): ...@@ -1231,10 +1231,8 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
>>> # Add a [CLS] to the vocabulary (we should train it also!) >>> # Add a [CLS] to the vocabulary (we should train it also!)
>>> num_added_tokens = tokenizer.add_special_tokens({"cls_token": "[CLS]"}) >>> num_added_tokens = tokenizer.add_special_tokens({"cls_token": "[CLS]"})
>>> # Update the model embeddings with the new vocabulary size
>>> embedding_layer = model.resize_token_embeddings( >>> embedding_layer = model.resize_token_embeddings(len(tokenizer))
... len(tokenizer)
>>> ) # Update the model embeddings with the new vocabulary size
>>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"] >>> choices = ["Hello, my dog is cute [CLS]", "Hello, my cat is cute [CLS]"]
>>> encoded_choices = [tokenizer.encode(s) for s in choices] >>> encoded_choices = [tokenizer.encode(s) for s in choices]
...@@ -1350,6 +1348,8 @@ class GPT2ForSequenceClassification(GPT2PreTrainedModel): ...@@ -1350,6 +1348,8 @@ class GPT2ForSequenceClassification(GPT2PreTrainedModel):
checkpoint="microsoft/DialogRPT-updown", checkpoint="microsoft/DialogRPT-updown",
output_type=SequenceClassifierOutputWithPast, output_type=SequenceClassifierOutputWithPast,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output="'LABEL_0'",
expected_loss=5.28,
) )
def forward( def forward(
self, self,
...@@ -1477,12 +1477,16 @@ class GPT2ForTokenClassification(GPT2PreTrainedModel): ...@@ -1477,12 +1477,16 @@ class GPT2ForTokenClassification(GPT2PreTrainedModel):
self.post_init() self.post_init()
@add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(GPT2_INPUTS_DOCSTRING)
# fmt: off
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint="microsoft/DialogRPT-updown", checkpoint="brad1141/gpt2-finetuned-comp2",
output_type=TokenClassifierOutput, output_type=TokenClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_loss=0.25,
expected_output=["Lead", "Lead", "Lead", "Position", "Lead", "Lead", "Lead", "Lead", "Lead", "Lead", "Lead", "Lead"],
) )
# fmt: on
def forward( def forward(
self, self,
input_ids=None, input_ids=None,
......
...@@ -15,6 +15,7 @@ src/transformers/models/data2vec/modeling_data2vec_audio.py ...@@ -15,6 +15,7 @@ src/transformers/models/data2vec/modeling_data2vec_audio.py
src/transformers/models/deit/modeling_deit.py src/transformers/models/deit/modeling_deit.py
src/transformers/models/dpt/modeling_dpt.py src/transformers/models/dpt/modeling_dpt.py
src/transformers/models/glpn/modeling_glpn.py src/transformers/models/glpn/modeling_glpn.py
src/transformers/models/gpt2/modeling_gpt2.py
src/transformers/models/hubert/modeling_hubert.py src/transformers/models/hubert/modeling_hubert.py
src/transformers/models/marian/modeling_marian.py src/transformers/models/marian/modeling_marian.py
src/transformers/models/mbart/modeling_mbart.py src/transformers/models/mbart/modeling_mbart.py
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment