"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "bc78fd12748a18dbc71faeae000f036378b065d5"
Unverified Commit 3bc65505 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Fix doctest for `Blip2ForConditionalGeneration` (#26737)



* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent e1cec434
...@@ -1272,14 +1272,10 @@ class Blip2Model(Blip2PreTrainedModel): ...@@ -1272,14 +1272,10 @@ class Blip2Model(Blip2PreTrainedModel):
>>> import torch >>> import torch
>>> from transformers import AutoTokenizer, Blip2Model >>> from transformers import AutoTokenizer, Blip2Model
>>> device = "cuda" if torch.cuda.is_available() else "cpu" >>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
>>> model.to(device) # doctest: +IGNORE_RESULT
>>> tokenizer = AutoTokenizer.from_pretrained("Salesforce/blip2-opt-2.7b") >>> tokenizer = AutoTokenizer.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt").to(device) >>> inputs = tokenizer(["a photo of a cat"], padding=True, return_tensors="pt")
>>> text_features = model.get_text_features(**inputs) >>> text_features = model.get_text_features(**inputs)
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
...@@ -1333,16 +1329,12 @@ class Blip2Model(Blip2PreTrainedModel): ...@@ -1333,16 +1329,12 @@ class Blip2Model(Blip2PreTrainedModel):
>>> import requests >>> import requests
>>> from transformers import AutoProcessor, Blip2Model >>> from transformers import AutoProcessor, Blip2Model
>>> device = "cuda" if torch.cuda.is_available() else "cpu" >>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
>>> model.to(device) # doctest: +IGNORE_RESULT
>>> processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b") >>> processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = processor(images=image, return_tensors="pt").to(device, torch.float16) >>> inputs = processor(images=image, return_tensors="pt")
>>> image_outputs = model.get_image_features(**inputs) >>> image_outputs = model.get_image_features(**inputs)
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
...@@ -1381,15 +1373,12 @@ class Blip2Model(Blip2PreTrainedModel): ...@@ -1381,15 +1373,12 @@ class Blip2Model(Blip2PreTrainedModel):
>>> import requests >>> import requests
>>> from transformers import Blip2Processor, Blip2Model >>> from transformers import Blip2Processor, Blip2Model
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
>>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b") >>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16) >>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> model.to(device) # doctest: +IGNORE_RESULT
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = processor(images=image, return_tensors="pt").to(device, torch.float16) >>> inputs = processor(images=image, return_tensors="pt")
>>> qformer_outputs = model.get_qformer_features(**inputs) >>> qformer_outputs = model.get_qformer_features(**inputs)
```""" ```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
...@@ -1654,7 +1643,7 @@ class Blip2ForConditionalGeneration(Blip2PreTrainedModel): ...@@ -1654,7 +1643,7 @@ class Blip2ForConditionalGeneration(Blip2PreTrainedModel):
Examples: Examples:
Image captioning (without providing a text prompt): Prepare processor, model and image input
```python ```python
>>> from PIL import Image >>> from PIL import Image
...@@ -1666,13 +1655,16 @@ class Blip2ForConditionalGeneration(Blip2PreTrainedModel): ...@@ -1666,13 +1655,16 @@ class Blip2ForConditionalGeneration(Blip2PreTrainedModel):
>>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b") >>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> model = Blip2ForConditionalGeneration.from_pretrained( >>> model = Blip2ForConditionalGeneration.from_pretrained(
... "Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16 ... "Salesforce/blip2-opt-2.7b", load_in_8bit=True, device_map={"": 0}, torch_dtype=torch.float16
... ) ... ) # doctest: +IGNORE_RESULT
>>> model.to(device) # doctest: +IGNORE_RESULT
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
```
Image captioning (without providing a text prompt):
```python
>>> inputs = processor(images=image, return_tensors="pt").to(device, torch.float16) >>> inputs = processor(images=image, return_tensors="pt").to(device, torch.float16)
>>> generated_ids = model.generate(**inputs) >>> generated_ids = model.generate(**inputs)
...@@ -1684,21 +1676,6 @@ class Blip2ForConditionalGeneration(Blip2PreTrainedModel): ...@@ -1684,21 +1676,6 @@ class Blip2ForConditionalGeneration(Blip2PreTrainedModel):
Visual question answering (prompt = question): Visual question answering (prompt = question):
```python ```python
>>> from PIL import Image
>>> import requests
>>> from transformers import Blip2Processor, Blip2ForConditionalGeneration
>>> import torch
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
>>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> model = Blip2ForConditionalGeneration.from_pretrained(
... "Salesforce/blip2-opt-2.7b", load_in_8bit=True, device_map={"": 0}, torch_dtype=torch.float16
... ) # doctest: +IGNORE_RESULT
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> prompt = "Question: how many cats are there? Answer:" >>> prompt = "Question: how many cats are there? Answer:"
>>> inputs = processor(images=image, text=prompt, return_tensors="pt").to(device="cuda", dtype=torch.float16) >>> inputs = processor(images=image, text=prompt, return_tensors="pt").to(device="cuda", dtype=torch.float16)
...@@ -1712,20 +1689,10 @@ class Blip2ForConditionalGeneration(Blip2PreTrainedModel): ...@@ -1712,20 +1689,10 @@ class Blip2ForConditionalGeneration(Blip2PreTrainedModel):
This greatly reduces the amount of memory used by the model while maintaining the same performance. This greatly reduces the amount of memory used by the model while maintaining the same performance.
```python ```python
>>> from PIL import Image
>>> import requests
>>> from transformers import Blip2Processor, Blip2ForConditionalGeneration
>>> import torch
>>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
>>> model = Blip2ForConditionalGeneration.from_pretrained( >>> model = Blip2ForConditionalGeneration.from_pretrained(
... "Salesforce/blip2-flan-t5-xl", load_in_8bit=True, device_map={"": 0}, torch_dtype=torch.bfloat16 ... "Salesforce/blip2-opt-2.7b", load_in_8bit=True, device_map={"": 0}, torch_dtype=torch.bfloat16
... ) # doctest: +IGNORE_RESULT ... ) # doctest: +IGNORE_RESULT
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> prompt = "Question: how many cats are there? Answer:"
>>> inputs = processor(images=image, text=prompt, return_tensors="pt").to(device="cuda", dtype=torch.bfloat16) >>> inputs = processor(images=image, text=prompt, return_tensors="pt").to(device="cuda", dtype=torch.bfloat16)
>>> generated_ids = model.generate(**inputs) >>> generated_ids = model.generate(**inputs)
......
docs/source/en/generation_strategies.md docs/source/en/generation_strategies.md
docs/source/en/model_doc/ctrl.md docs/source/en/model_doc/ctrl.md
docs/source/en/task_summary.md docs/source/en/task_summary.md
src/transformers/models/blip_2/modeling_blip_2.py
src/transformers/models/ctrl/modeling_ctrl.py src/transformers/models/ctrl/modeling_ctrl.py
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment