Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
c832bcb8
Unverified
Commit
c832bcb8
authored
Nov 27, 2023
by
NielsRogge
Committed by
GitHub
Nov 27, 2023
Browse files
Fix owlv2 code snippet (#27698)
* Fix code snippet * Improve code snippet
parent
334a6d18
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
56 additions
and
20 deletions
+56
-20
src/transformers/models/owlv2/modeling_owlv2.py
src/transformers/models/owlv2/modeling_owlv2.py
+56
-20
No files found.
src/transformers/models/owlv2/modeling_owlv2.py
View file @
c832bcb8
...
...
@@ -1544,19 +1544,38 @@ class Owlv2ForObjectDetection(Owlv2PreTrainedModel):
>>> import requests
>>> from PIL import Image
>>> import torch
>>> import numpy as np
>>> from transformers import AutoProcessor, Owlv2ForObjectDetection
>>> from transformers.utils.constants import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD
>>> processor = AutoProcessor.from_pretrained("google/owlv2-base-patch16-ensemble")
>>> model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> query_url = "http://images.cocodataset.org/val2017/000000001675.jpg"
>>> query_image = Image.open(requests.get(query_url, stream=True).raw)
>>> inputs = processor(images=image, query_images=query_image, return_tensors="pt")
>>> # forward pass
>>> with torch.no_grad():
... outputs = model.image_guided_detection(**inputs)
>>> # Target image sizes (height, width) to rescale box predictions [batch_size, 2]
>>> target_sizes = torch.Tensor([image.size[::-1]])
>>> # Note: boxes need to be visualized on the padded, unnormalized image
>>> # hence we'll set the target image sizes (height, width) based on that
>>> def get_preprocessed_image(pixel_values):
... pixel_values = pixel_values.squeeze().numpy()
... unnormalized_image = (pixel_values * np.array(OPENAI_CLIP_STD)[:, None, None]) + np.array(OPENAI_CLIP_MEAN)[:, None, None]
... unnormalized_image = (unnormalized_image * 255).astype(np.uint8)
... unnormalized_image = np.moveaxis(unnormalized_image, 0, -1)
... unnormalized_image = Image.fromarray(unnormalized_image)
... return unnormalized_image
>>> unnormalized_image = get_preprocessed_image(inputs.pixel_values)
>>> target_sizes = torch.Tensor([unnormalized_image.size[::-1]])
>>> # Convert outputs (bounding boxes and class logits) to COCO API
>>> results = processor.post_process_image_guided_detection(
... outputs=outputs, threshold=0.9, nms_threshold=0.3, target_sizes=target_sizes
...
...
@@ -1566,19 +1585,19 @@ class Owlv2ForObjectDetection(Owlv2PreTrainedModel):
>>> for box, score in zip(boxes, scores):
... box = [round(i, 2) for i in box.tolist()]
... print(f"Detected similar object with confidence {round(score.item(), 3)} at location {box}")
Detected similar object with confidence 0.938 at location [
327.31, 54.94, 547.39, 268.06
]
Detected similar object with confidence 0.959 at location [
5.78, 360.65, 619.12, 366.39
]
Detected similar object with confidence 0.902 at location [
2.85, 36
0.0
1
,
627.63, 380.7
9]
Detected similar object with confidence 0.985 at location [
176.97, -29.45, 672.69, 182.83
]
Detected similar object with confidence 1.0 at location [
6.53, 14.35, 624.87, 470.82
]
Detected similar object with confidence 0.998 at location [
57
9.9
8
,
29.14, 615.49, 489.05
]
Detected similar object with confidence 0.985 at location [
206.15, 10.53, 247.74, 466
.0
1
]
Detected similar object with confidence 0.947 at location [
18.62, 429.72, 646.5, 457.72
]
Detected similar object with confidence 0.996 at location [
523.88, 20.69, 586.84, 483.18
]
Detected similar object with confidence 0.998 at location [
3.39, 360.59, 617.29
,
4
99
.2
1]
Detected similar object with confidence 0.969 at location [
4.47, 449.05, 614.5, 474.76
]
Detected similar object with confidence 0.966 at location [
31.44, 463.65, 654.66, 471.07
]
Detected similar object with confidence 0.924 at location [
30.93, 468.07, 635.35, 475.39
]
Detected similar object with confidence 0.938 at location [
490.96, 109.89, 821.09, 536.11
]
Detected similar object with confidence 0.959 at location [
8.67, 721.29, 928.68, 732.78
]
Detected similar object with confidence 0.902 at location [
4.27, 72
0.0
2
,
941.45, 761.5
9]
Detected similar object with confidence 0.985 at location [
265.46, -58.9, 1009.04, 365.66
]
Detected similar object with confidence 1.0 at location [
9.79, 28.69, 937.31, 941.64
]
Detected similar object with confidence 0.998 at location [
86
9.9
7
,
58.28, 923.23, 978.1
]
Detected similar object with confidence 0.985 at location [
309.23, 21.07, 371.61, 932
.0
2
]
Detected similar object with confidence 0.947 at location [
27.93, 859.45, 969.75, 915.44
]
Detected similar object with confidence 0.996 at location [
785.82, 41.38, 880.26, 966.37
]
Detected similar object with confidence 0.998 at location [
5.08, 721.17, 925.93
, 99
8.4
1]
Detected similar object with confidence 0.969 at location [
6.7, 898.1, 921.75, 949.51
]
Detected similar object with confidence 0.966 at location [
47.16, 927.29, 981.99, 942.14
]
Detected similar object with confidence 0.924 at location [
46.4, 936.13, 953.02, 950.78
]
```"""
output_attentions
=
output_attentions
if
output_attentions
is
not
None
else
self
.
config
.
output_attentions
output_hidden_states
=
(
...
...
@@ -1650,8 +1669,10 @@ class Owlv2ForObjectDetection(Owlv2PreTrainedModel):
```python
>>> import requests
>>> from PIL import Image
>>> import numpy as np
>>> import torch
>>> from transformers import AutoProcessor, Owlv2ForObjectDetection
>>> from transformers.utils.constants import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD
>>> processor = AutoProcessor.from_pretrained("google/owlv2-base-patch16-ensemble")
>>> model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble")
...
...
@@ -1660,10 +1681,25 @@ class Owlv2ForObjectDetection(Owlv2PreTrainedModel):
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> texts = [["a photo of a cat", "a photo of a dog"]]
>>> inputs = processor(text=texts, images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> # Target image sizes (height, width) to rescale box predictions [batch_size, 2]
>>> target_sizes = torch.Tensor([image.size[::-1]])
>>> # forward pass
>>> with torch.no_grad():
... outputs = model(**inputs)
>>> # Note: boxes need to be visualized on the padded, unnormalized image
>>> # hence we'll set the target image sizes (height, width) based on that
>>> def get_preprocessed_image(pixel_values):
... pixel_values = pixel_values.squeeze().numpy()
... unnormalized_image = (pixel_values * np.array(OPENAI_CLIP_STD)[:, None, None]) + np.array(OPENAI_CLIP_MEAN)[:, None, None]
... unnormalized_image = (unnormalized_image * 255).astype(np.uint8)
... unnormalized_image = np.moveaxis(unnormalized_image, 0, -1)
... unnormalized_image = Image.fromarray(unnormalized_image)
... return unnormalized_image
>>> unnormalized_image = get_preprocessed_image(inputs.pixel_values)
>>> target_sizes = torch.Tensor([unnormalized_image.size[::-1]])
>>> # Convert outputs (bounding boxes and class logits) to final bounding boxes and scores
>>> results = processor.post_process_object_detection(
... outputs=outputs, threshold=0.2, target_sizes=target_sizes
...
...
@@ -1676,8 +1712,8 @@ class Owlv2ForObjectDetection(Owlv2PreTrainedModel):
>>> for box, score, label in zip(boxes, scores, labels):
... box = [round(i, 2) for i in box.tolist()]
... print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
Detected a photo of a cat with confidence 0.614 at location [
341.67, 17.54, 642.32, 278.51
]
Detected a photo of a cat with confidence 0.665 at location [
6.75, 38
.9
7
,
326.62, 354.85
]
Detected a photo of a cat with confidence 0.614 at location [
512.5, 35.08, 963.48, 557.02
]
Detected a photo of a cat with confidence 0.665 at location [
10.13, 77.94, 489
.9
3
,
709.69
]
```"""
output_attentions
=
output_attentions
if
output_attentions
is
not
None
else
self
.
config
.
output_attentions
output_hidden_states
=
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment