Unverified Commit 47c2af09 authored by NielsRogge's avatar NielsRogge Committed by GitHub
Browse files

[DETR] Improve code examples (#18262)



* Improve doc test

* Improve code example of segmentation model

* Apply suggestion

* Update src/transformers/models/detr/modeling_detr.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
Co-authored-by: default avatarNiels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
parent ee67e7ad
......@@ -1237,8 +1237,15 @@ class DetrModel(DetrPreTrainedModel):
>>> feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50")
>>> model = DetrModel.from_pretrained("facebook/detr-resnet-50")
>>> # prepare image for the model
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> # forward pass
>>> outputs = model(**inputs)
>>> # the last hidden states are the final query embeddings of the Transformer decoder
>>> # these are of shape (batch_size, num_queries, hidden_size)
>>> last_hidden_states = outputs.last_hidden_state
>>> list(last_hidden_states.shape)
[1, 100, 256]
......@@ -1389,6 +1396,7 @@ class DetrForObjectDetection(DetrPreTrainedModel):
```python
>>> from transformers import DetrFeatureExtractor, DetrForObjectDetection
>>> import torch
>>> from PIL import Image
>>> import requests
......@@ -1400,17 +1408,24 @@ class DetrForObjectDetection(DetrPreTrainedModel):
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> # model predicts bounding boxes and corresponding COCO classes
>>> bboxes, logits = outputs.pred_boxes, outputs.logits
>>> # get probability per object class and remove the no-object class
>>> probas_per_class = outputs.logits.softmax(-1)[:, :, :-1]
>>> objects_to_keep = probas_per_class.max(-1).values > 0.9
>>> ids, _ = probas_per_class.max(-1).indices[objects_to_keep].sort()
>>> labels = [model.config.id2label[id.item()] for id in ids]
>>> labels
['cat', 'cat', 'couch', 'remote', 'remote']
>>> # convert outputs (bounding boxes and class logits) to COCO API
>>> target_sizes = torch.tensor([image.size[::-1]])
>>> results = feature_extractor.post_process(outputs, target_sizes=target_sizes)[0]
>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
... box = [round(i, 2) for i in box.tolist()]
... # let's only keep detections with score > 0.9
... if score > 0.9:
... print(
... f"Detected {model.config.id2label[label.item()]} with confidence "
... f"{round(score.item(), 3)} at location {box}."
... )
Detected remote with confidence 0.998 at location [40.16, 70.81, 175.55, 117.98]
Detected remote with confidence 0.996 at location [333.24, 72.55, 368.33, 187.66]
Detected couch with confidence 0.995 at location [-0.02, 1.15, 639.73, 473.76]
Detected cat with confidence 0.999 at location [13.24, 52.05, 314.02, 470.93]
Detected cat with confidence 0.999 at location [345.4, 23.85, 640.37, 368.72]
```"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
......@@ -1552,9 +1567,14 @@ class DetrForSegmentation(DetrPreTrainedModel):
Examples:
```python
>>> from transformers import DetrFeatureExtractor, DetrForSegmentation
>>> from PIL import Image
>>> import io
>>> import requests
>>> from PIL import Image
>>> import torch
>>> import numpy
>>> from transformers import DetrFeatureExtractor, DetrForSegmentation
>>> from transformers.models.detr.feature_extraction_detr import rgb_to_id
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
......@@ -1562,20 +1582,23 @@ class DetrForSegmentation(DetrPreTrainedModel):
>>> feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50-panoptic")
>>> model = DetrForSegmentation.from_pretrained("facebook/detr-resnet-50-panoptic")
>>> # prepare image for the model
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> # forward pass
>>> outputs = model(**inputs)
>>> # model predicts COCO classes, bounding boxes, and masks
>>> logits = outputs.logits
>>> list(logits.shape)
[1, 100, 251]
>>> bboxes = outputs.pred_boxes
>>> list(bboxes.shape)
[1, 100, 4]
>>> masks = outputs.pred_masks
>>> list(masks.shape)
[1, 100, 200, 267]
>>> # use the `post_process_panoptic` method of `DetrFeatureExtractor` to convert to COCO format
>>> processed_sizes = torch.as_tensor(inputs["pixel_values"].shape[-2:]).unsqueeze(0)
>>> result = feature_extractor.post_process_panoptic(outputs, processed_sizes)[0]
>>> # the segmentation is stored in a special-format png
>>> panoptic_seg = Image.open(io.BytesIO(result["png_string"]))
>>> panoptic_seg = numpy.array(panoptic_seg, dtype=numpy.uint8)
>>> # retrieve the ids corresponding to each mask
>>> panoptic_seg_id = rgb_to_id(panoptic_seg)
>>> panoptic_seg_id.shape
(800, 1066)
```"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment