add icon

eb807a19 · mashun1 · eb807a19 · eb807a19 · eb807a19 · eb807a19
Commit eb807a19 authored Aug 30, 2024 by mashun1
20 changed files
--- a/assets/more_usages/more_points.png
+++ b/assets/more_usages/more_points.png
--- a/assets/more_usages/text_prompt_cat.png
+++ b/assets/more_usages/text_prompt_cat.png
--- a/assets/replicate-1.png
+++ b/assets/replicate-1.png
--- a/assets/replicate-2.png
+++ b/assets/replicate-2.png
--- a/assets/replicate-3.png
+++ b/assets/replicate-3.png
--- a/assets/salient.png
+++ b/assets/salient.png
--- a/cog.yaml
+++ b/cog.yaml
+# Configuration for Cog ⚙️
+# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
+# Thanks for chenxwh.
+build:
+  # set to true if your model requires a GPU
+  gpu: true
+  cuda: "11.7"
+  system_packages:
+    - "libgl1-mesa-glx"
+    - "libglib2.0-0"
+  python_version: "3.8"
+  python_packages:
+    - "matplotlib==3.7.1"
+    - "opencv-python==4.7.0.72"
+    - "Pillow==9.5.0"
+    - "PyYAML==6.0"
+    - "requests==2.31.0"
+    - "scipy==1.10.1"
+    - "torch==2.0.1"
+    - "torchvision==0.15.2"
+    - "tqdm==4.65.0"
+    - "pandas==2.0.2"
+    - "seaborn==0.12.0"
+    - "ultralytics==8.0.121"
+    - git+https://github.com/openai/CLIP.git
+predict: "predict.py:Predictor"
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
+FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-ubuntu20.04-dtk24.04.1-py3.10
+RUN source /opt/dtk/env.sh
+COPY requirements.txt requirements.txt
+RUN pip3 install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/
+RUN pip3 install git+https://github.com/openai/CLIP.git
--- a/docker/requirements.txt
+++ b/docker/requirements.txt
+# Base-----------------------------------
+matplotlib>=3.2.2
+opencv-python>=4.6.0
+Pillow>=7.1.2
+PyYAML>=5.3.1
+requests>=2.23.0
+scipy>=1.4.1
+torch>=1.7.0
+torchvision>=0.8.1
+tqdm>=4.64.0
+pandas>=1.1.4
+seaborn>=0.11.0
+gradio==3.35.2
+# Ultralytics-----------------------------------
+ultralytics == 8.0.120
--- a/examples/dogs.jpg
+++ b/examples/dogs.jpg
--- a/examples/sa_10039.jpg
+++ b/examples/sa_10039.jpg
--- a/examples/sa_11025.jpg
+++ b/examples/sa_11025.jpg
--- a/examples/sa_1309.jpg
+++ b/examples/sa_1309.jpg
--- a/examples/sa_192.jpg
+++ b/examples/sa_192.jpg
--- a/examples/sa_414.jpg
+++ b/examples/sa_414.jpg
--- a/examples/sa_561.jpg
+++ b/examples/sa_561.jpg
--- a/examples/sa_862.jpg
+++ b/examples/sa_862.jpg
--- a/examples/sa_8776.jpg
+++ b/examples/sa_8776.jpg
--- a/fastsam/__init__.py
+++ b/fastsam/__init__.py
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+from .model import FastSAM
+from .predict import FastSAMPredictor
+from .prompt import FastSAMPrompt
+# from .val import FastSAMValidator
+from .decoder import FastSAMDecoder
+__all__ = 'FastSAMPredictor', 'FastSAM', 'FastSAMPrompt', 'FastSAMDecoder'
--- a/fastsam/decoder.py
+++ b/fastsam/decoder.py
+from .model import FastSAM
+import numpy as np
+from PIL import Image
+import clip
+from typing import Optional, List, Tuple, Union
+class FastSAMDecoder:
+    def __init__(
+        self,
+        model: FastSAM,
+        device: str='cpu',
+        conf: float=0.4, 
+        iou: float=0.9,
+        imgsz: int=1024,
+        retina_masks: bool=True,
+        ):
+        self.model = model
+        self.device = device
+        self.retina_masks = retina_masks
+        self.imgsz = imgsz
+        self.conf = conf
+        self.iou = iou
+        self.image = None
+        self.image_embedding = None
+    def run_encoder(self, image):
+        if isinstance(image,str):
+            image =  np.array(Image.open(image))
+        self.image = image
+        image_embedding = self.model(
+            self.image, 
+            device=self.device, 
+            retina_masks=self.retina_masks, 
+            imgsz=self.imgsz, 
+            conf=self.conf, 
+            iou=self.iou
+            )
+        return image_embedding[0].numpy()
+    def run_decoder(
+            self,
+            image_embedding, 
+            point_prompt: Optional[np.ndarray]=None,
+            point_label: Optional[np.ndarray]=None,
+            box_prompt: Optional[np.ndarray]=None,
+            text_prompt: Optional[str]=None,
+            )->np.ndarray:
+        self.image_embedding = image_embedding
+        if point_prompt is not None:
+            ann = self.point_prompt(points=point_prompt, pointlabel=point_label)
+            return ann
+        elif box_prompt is not None:
+            ann = self.box_prompt(bbox=box_prompt)
+            return ann
+        elif text_prompt is not None:
+            ann = self.text_prompt(text=text_prompt)
+            return ann
+        else:
+            return None
+    def box_prompt(self, bbox):
+        assert (bbox[2] != 0 and bbox[3] != 0)
+        masks = self.image_embedding.masks.data
+        target_height = self.image.shape[0]
+        target_width = self.image.shape[1]
+        h = masks.shape[1]
+        w = masks.shape[2]
+        if h != target_height or w != target_width:
+            bbox = [
+                int(bbox[0] * w / target_width),
+                int(bbox[1] * h / target_height),
+                int(bbox[2] * w / target_width),
+                int(bbox[3] * h / target_height), ]
+        bbox[0] = round(bbox[0]) if round(bbox[0]) > 0 else 0
+        bbox[1] = round(bbox[1]) if round(bbox[1]) > 0 else 0
+        bbox[2] = round(bbox[2]) if round(bbox[2]) < w else w
+        bbox[3] = round(bbox[3]) if round(bbox[3]) < h else h
+        # IoUs = torch.zeros(len(masks), dtype=torch.float32)
+        bbox_area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0])
+        masks_area = np.sum(masks[:, bbox[1]:bbox[3], bbox[0]:bbox[2]], axis=(1, 2))
+        orig_masks_area = np.sum(masks, axis=(1, 2))
+        union = bbox_area + orig_masks_area - masks_area
+        IoUs = masks_area / union
+        max_iou_index = np.argmax(IoUs)
+        return np.array([masks[max_iou_index].cpu().numpy()])
+    def point_prompt(self, points, pointlabel):  # numpy 
+        masks = self._format_results(self.image_embedding[0], 0)
+        target_height = self.image.shape[0]
+        target_width = self.image.shape[1]
+        h = masks[0]['segmentation'].shape[0]
+        w = masks[0]['segmentation'].shape[1]
+        if h != target_height or w != target_width:
+            points = [[int(point[0] * w / target_width), int(point[1] * h / target_height)] for point in points]
+        onemask = np.zeros((h, w))
+        masks = sorted(masks, key=lambda x: x['area'], reverse=True)
+        for i, annotation in enumerate(masks):
+            if type(annotation) == dict:
+                mask = annotation['segmentation']
+            else:
+                mask = annotation
+            for i, point in enumerate(points):
+                if mask[point[1], point[0]] == 1 and pointlabel[i] == 1:
+                    onemask[mask] = 1
+                if mask[point[1], point[0]] == 1 and pointlabel[i] == 0:
+                    onemask[mask] = 0
+        onemask = onemask >= 1
+        return np.array([onemask])
+    def _format_results(self, result, filter=0):
+        annotations = []
+        n = len(result.masks.data)
+        for i in range(n):
+            annotation = {}
+            mask = result.masks.data[i] == 1.0
+            if np.sum(mask) < filter:
+                continue
+            annotation['id'] = i
+            annotation['segmentation'] = mask
+            annotation['bbox'] = result.boxes.data[i]
+            annotation['score'] = result.boxes.conf[i]
+            annotation['area'] = annotation['segmentation'].sum()
+            annotations.append(annotation)
+        return annotations