Contents

Segment Anything

Model Card

1
2
3
4
5
6
7

git clone https://github.com/facebookresearch/segment-anything.git
cd segment-anything
# instsll
pip install -e .
# download the checkpoint
mkdir checkpoints; cd checkpoints; curl -OL https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth; cd ..

Test run

1
2
3
4
5
6
7
8
9
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
sam = sam_model_registry["vit_h"](checkpoint="/home/jovyan/segment-anything/checkpoints/sam_vit_h_4b8939.pth")
sam = sam.to('cuda')
mask_generator = SamAutomaticMaskGenerator(sam)

from PIL import Image
import numpy as np
image = np.asarray(Image.open('/home/jovyan/segment-anything/1.jpg'))
masks = mask_generator.generate(image)

What does mask have?

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
annotation {
    "id"                    : int,              # Annotation id
    "segmentation"          : dict,             # Mask saved in COCO RLE format.
    "bbox"                  : [x, y, w, h],     # The box around the mask, in XYWH format
    "area"                  : int,              # The area in pixels of the mask
    "predicted_iou"         : float,            # The model's own prediction of the mask's quality
    "stability_score"       : float,            # A measure of the mask's quality
    "crop_box"              : [x, y, w, h],     # The crop of the image used to generate the mask, in XYWH format
    "point_coords"          : [[x, y]],         # The point coordinates input to the model to generate the mask
}

num_objects = len(masks)  # 56
masks[0].keys()  # ['segmentation', 'area', 'bbox', 'predicted_iou', 'point_coords', 'stability_score', 'crop_box']
masks[0]['segmentation'].shape  # (512, 384), True or False for each pixel, True means this pixel belong to this object
masks[0]['area']  # 80520, how big is this object
masks[0]['bbox']  # [25, 106, 323, 325], bounding box
masks[0]['predicted_iou']  # 1.0196424722671509
masks[0]['point_coords']  # [[138.0, 168.0]]
masks[0]['stability_score']  # 0.9537884593009949
masks[0]['crop_box']  # [0, 0, 384, 512]