Unverified Commit f52746d0 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

[Fix doc example] FlaxVisionEncoderDecoder (#15626)



* Fix wrong checkpoint name: vit

* Fix missing import

* Fix more missing import

* make style

* Apply suggestions from code review
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
Co-authored-by: default avatarNielsRogge <48327001+NielsRogge@users.noreply.github.com>
parent 52d2e6f6
...@@ -393,7 +393,7 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel): ...@@ -393,7 +393,7 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel):
Example: Example:
```python ```python
>>> from transformers import FlaxVisionEncoderDecoderModel >>> from transformers import ViTFeatureExtractor, FlaxVisionEncoderDecoderModel
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
...@@ -403,7 +403,9 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel): ...@@ -403,7 +403,9 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel):
>>> feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k") >>> feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
>>> # initialize a vit-gpt2 from pretrained ViT and GPT2 models. Note that the cross-attention layers will be randomly initialized >>> # initialize a vit-gpt2 from pretrained ViT and GPT2 models. Note that the cross-attention layers will be randomly initialized
>>> model = FlaxVisionEncoderDecoderModel.from_encoder_decoder_pretrained("vit", "gpt2") >>> model = FlaxVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
... "google/vit-base-patch16-224-in21k", "gpt2"
... )
>>> pixel_values = feature_extractor(images=image, return_tensors="np").pixel_values >>> pixel_values = feature_extractor(images=image, return_tensors="np").pixel_values
>>> encoder_outputs = model.encode(pixel_values) >>> encoder_outputs = model.encode(pixel_values)
...@@ -469,7 +471,7 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel): ...@@ -469,7 +471,7 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel):
Example: Example:
```python ```python
>>> from transformers import FlaxVisionEncoderDecoderModel >>> from transformers import ViTFeatureExtractor, FlaxVisionEncoderDecoderModel
>>> import jax.numpy as jnp >>> import jax.numpy as jnp
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
...@@ -480,7 +482,9 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel): ...@@ -480,7 +482,9 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel):
>>> feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k") >>> feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
>>> # initialize a vit-gpt2 from pretrained ViT and GPT2 models. Note that the cross-attention layers will be randomly initialized >>> # initialize a vit-gpt2 from pretrained ViT and GPT2 models. Note that the cross-attention layers will be randomly initialized
>>> model = FlaxVisionEncoderDecoderModel.from_encoder_decoder_pretrained("vit", "gpt2") >>> model = FlaxVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
... "google/vit-base-patch16-224-in21k", "gpt2"
... )
>>> pixel_values = feature_extractor(images=image, return_tensors="np").pixel_values >>> pixel_values = feature_extractor(images=image, return_tensors="np").pixel_values
>>> encoder_outputs = model.encode(pixel_values) >>> encoder_outputs = model.encode(pixel_values)
...@@ -610,7 +614,9 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel): ...@@ -610,7 +614,9 @@ class FlaxVisionEncoderDecoderModel(FlaxPreTrainedModel):
>>> tokenizer_output = GPT2Tokenizer.from_pretrained("gpt2") >>> tokenizer_output = GPT2Tokenizer.from_pretrained("gpt2")
>>> # initialize a vit-gpt2 from pretrained ViT and GPT2 models. Note that the cross-attention layers will be randomly initialized >>> # initialize a vit-gpt2 from pretrained ViT and GPT2 models. Note that the cross-attention layers will be randomly initialized
>>> model = FlaxVisionEncoderDecoderModel.from_encoder_decoder_pretrained("vit", "gpt2") >>> model = FlaxVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
... "google/vit-base-patch16-224-in21k", "gpt2"
... )
>>> pixel_values = feature_extractor(images=image, return_tensors="np").pixel_values >>> pixel_values = feature_extractor(images=image, return_tensors="np").pixel_values
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment