coco.py

#!/usr/bin/env python
# coding=utf-8
"""
This is a script for downloading and converting the microsoft coco dataset
from mscoco.org. This can be run as an independent executable to download
the dataset or be imported by scripts used for larger experiments.
"""
from __future__ import division, print_function, unicode_literals
import os
import errno
import zipfile
import json
from sacred import Experiment, Ingredient
import numpy as np
from PIL import Image
from keras.utils import get_file
from keras.utils.generic_utils import Progbar
from pycocotools.coco import COCO


def palette():
    max_cid = max(ids()) + 1
    return [(cid, cid, cid) for cid in range(max_cid)]


def cids_to_ids_map():
    return {cid: idx for idx, cid in enumerate(ids())}


def ids():
    return [0,
            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17,
            18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
            37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53,
            54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73,
            74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]


def id_to_palette_map():
    return {idx: color for idx, color in enumerate(palette())}
    # return {0: (0, 0, 0), idx: (idx, idx, idx)
    # for idx, _ in enumerate(categories())}


def cid_to_palette_map():
    return {ids()[idx]: color for idx, color in enumerate(palette())}


def palette_to_id_map():
    return {color: ids()[idx] for idx, color in enumerate(palette())}
    # return {(0, 0, 0): 0, (idx, idx, idx): idx
    # for idx, _ in enumerate(categories())}


def class_weight(image_segmentation_stats_file=None,
                 weighting_algorithm='total_pixels_p_complement'):
    # weights = defaultdict(lambda: 1.5)
    if image_segmentation_stats_file is None:
        weights = {i: 1.5 for i in ids()}
        weights[0] = 0.5
        return weights
    else:
        with open(image_segmentation_stats_file, 'r') as fjson:
            stats = json.loads(fjson)
            return stats[weighting_algorithm]


def mask_to_palette_map(cid):
    mapper = id_to_palette_map()
    return {0: mapper[0], 255: mapper[cid]}


def categories():  # 80 classes
    return ['background',  # class zero
            'person', 'bicycle', 'car', 'motorcycle',
            'airplane', 'bus', 'train',
            'truck', 'boat', 'traffic light',
            'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
            'cat', 'dog', 'horse', 'sheep', 'cow',
            'elephant', 'bear', 'zebra', 'giraffe',
            'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
            'skis', 'snowboard', 'sports ball', 'kite',
            'baseball bat', 'baseball glove', 'skateboard',
            'surfboard', 'tennis racket', 'bottle',
            'wine glass', 'cup', 'fork', 'knife',
            'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
            'broccoli', 'carrot', 'hot dog', 'pizza',
            'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
            'dining table', 'toilet', 'tv', 'laptop',
            'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
            'toaster', 'sink', 'refrigerator', 'book',
            'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


def id_to_category(category_id):
    return {cid: categories()[idx] for idx, cid in enumerate(ids())}[category_id]


def category_to_cid_map():
    return {category: ids()[idx] for idx, category in enumerate(categories())}


def mkdir_p(path):
    # http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise


# ============== Ingredient 2: dataset =======================
data_coco = Experiment("dataset")


@data_coco.config
def coco_config():
    # TODO(ahundt) add md5 sums for each file
    verbose = 1
    coco_api = 'https://github.com/pdollar/coco/'
    dataset_root = os.path.join(os.path.expanduser('~'), 'datasets')
    dataset_path = os.path.join(dataset_root, 'coco')
    urls = [
        'coco2014/train2014.zip',
        'coco2014/val2014.zip',
        'coco2014/test2014.zip',
        'coco2015/test2015.zip',
        'annotations-1-0-3/instances_train-val2014.zip',
        'annotations-1-0-3/person_keypoints_trainval2014.zip',
        'annotations-1-0-4/image_info_test2014.zip',
        'annotations-1-0-4/image_info_test2015.zip',
        'annotations-1-0-3/captions_train-val2014.zip'
    ]
    base_url = 'http://msvocds.blob.core.windows.net/'
    urls = [base_url + x for x in urls]
    data_prefixes = [
        'train2014',
        'val2014',
        'test2014',
        'test2015',
    ]
    image_filenames = [prefix + '.zip' for prefix in data_prefixes]
    annotation_filenames = [
        'instances_train-val2014.zip',  # training AND validation info
        'image_info_test2014.zip',  # basic info like download links + category
        'image_info_test2015.zip',  # basic info like download links + category
        'person_keypoints_trainval2014.zip',  # elbows, head, wrist etc
        'captions_train-val2014.zip',  # descriptions of images
    ]
    md5s = [
        '0da8c0bd3d6becc4dcb32757491aca88',  # train2014.zip
        'a3d79f5ed8d289b7a7554ce06a5782b3',  # val2014.zip
        '04127eef689ceac55e3a572c2c92f264',  # test2014.zip
        '65562e58af7d695cc47356951578c041',  # test2015.zip
        '59582776b8dd745d649cd249ada5acf7',  # instances_train-val2014.zip
        '926b9df843c698817ee62e0e049e3753',  # person_keypoints_trainval2014.zip
        'f3366b66dc90d8ae0764806c95e43c86',  # image_info_test2014.zip
        '8a5ad1a903b7896df7f8b34833b61757',  # image_info_test2015.zip
        '5750999c8c964077e3c81581170be65b'   # captions_train-val2014.zip
    ]
    filenames = image_filenames + annotation_filenames
    seg_mask_path = os.path.join(dataset_path, 'seg_mask')
    annotation_json = [
        'annotations/instances_train2014.json',
        'annotations/instances_val2014.json'
    ]
    annotation_paths = [os.path.join(dataset_path, postfix)
                        for postfix in annotation_json]
    # only first two data prefixes contain segmentation masks
    seg_mask_image_paths = [os.path.join(dataset_path, prefix)
                            for prefix in data_prefixes[0:1]]
    seg_mask_output_paths = [os.path.join(seg_mask_path, prefix)
                             for prefix in data_prefixes[0:1]]
    seg_mask_extensions = ['.npy' for prefix in data_prefixes[0:1]]
    image_dirs = [os.path.join(dataset_path, prefix) for prefix in data_prefixes]
    image_extensions = ['.jpg' for prefix in data_prefixes]
    voc_imageset_txt_paths = [os.path.join(dataset_path,
                                           'annotations', prefix + '.txt')
                              for prefix in data_prefixes]


@data_coco.capture
def coco_files(dataset_path, filenames, dataset_root, urls, md5s, annotation_paths):
    print(dataset_path)
    print(dataset_root)
    print(urls)
    print(filenames)
    print(md5s)
    print(annotation_paths)
    return [os.path.join(dataset_path, file) for file in filenames]


@data_coco.command
def print_coco_files(dataset_path, filenames, dataset_root,
                     urls, md5s, annotation_paths):
    coco_files(dataset_path, filenames, dataset_root, urls, md5s, annotation_paths)


@data_coco.command
def coco_download(dataset_path, filenames, dataset_root,
                  urls, md5s, annotation_paths):
    zip_paths = coco_files(dataset_path, filenames, dataset_root,
                           urls, md5s, annotation_paths)
    for url, filename, md5 in zip(urls, filenames, md5s):
        path = get_file(filename, url, md5_hash=md5,
                        extract=True, cache_subdir=dataset_path)
        # TODO(ahundt) check if it is already extracted, don't re-extract. see
        # https://github.com/fchollet/keras/issues/5861
        zip_file = zipfile.ZipFile(path, 'r')
        zip_file.extractall(path=dataset_path)
        zip_file.close()


@data_coco.command
def coco_json_to_segmentation(seg_mask_output_paths,
                              annotation_paths, seg_mask_image_paths, verbose):
    for (seg_mask_path, annFile, image_path) in zip(
            seg_mask_output_paths, annotation_paths, seg_mask_image_paths):
        print('Loading COCO Annotations File: ', annFile)
        print('Segmentation Mask Output Folder: ', seg_mask_path)
        print('Source Image Folder: ', image_path)
        print('\n'
              'WARNING: Each pixel can have multiple classes! That means'
              'class data overlaps. Also, single objects can be outlined'
              'multiple times because they were labeled by different people!'
              'In other words, even a single object may be segmented twice.'
              'This means the .png files are missing entire objects.\n\n'
              'Use of categorical one-hot encoded .npy files is recommended,'
              'but .npy files also have limitations, because the .npy files'
              'only have one label per pixel for each class,'
              'and currently take the union of multiple human class labels.'
              'Improving how your data is handled will improve your results'
              'so remember to consider that limitation. There is still'
              'an opportunity to improve how this training data is handled &'
              'integrated with your training scripts and utilities...')
        coco = COCO(annFile)

        print('Converting Annotations to Segmentation Masks...')
        mkdir_p(seg_mask_path)
        total_imgs = len(coco.imgToAnns.keys())
        progbar = Progbar(total_imgs + len(coco.getImgIds()), verbose=verbose)
        # 'annotations' was previously 'instances' in an old version
        for img_num in range(total_imgs):
            # Both [0]'s are used to extract the element from a list
            img = coco.loadImgs(
                coco.imgToAnns[coco.imgToAnns.keys()[img_num]][0]['image_id'])[0]
            h = img['height']
            w = img['width']
            name = img['file_name']
            root_name = name[:-4]
            filename = os.path.join(seg_mask_path, root_name + ".png")
            file_exists = os.path.exists(filename)
            if file_exists:
                progbar.update(img_num, [('file_fraction_already_exists', 1)])
                continue
            else:
                progbar.update(img_num, [('file_fraction_already_exists', 0)])
                print(filename)

            MASK = np.zeros((h, w), dtype=np.uint8)
            np.where(MASK > 0)
            for ann in coco.imgToAnns[coco.imgToAnns.keys()[img_num]]:
                mask = coco.annToMask(ann)
                idxs = np.where(mask > 0)
                MASK[idxs] = ann['category_id']

            im = Image.fromarray(MASK)
            im.save(filename)

        print('\nConverting Annotations to one hot encoded'
              'categorical .npy Segmentation Masks...')
        img_ids = coco.getImgIds()
        use_original_dims = True  # not target_shape
        for idx, img_id in enumerate(img_ids):
            img = coco.loadImgs(img_id)[0]
            name = img['file_name']
            root_name = name[:-4]
            filename = os.path.join(seg_mask_path, root_name + ".npy")
            file_exists = os.path.exists(filename)
            if file_exists:
                progbar.add(1, [('file_fraction_already_exists', 1)])
                continue
            else:
                progbar.add(1, [('file_fraction_already_exists', 0)])

            if use_original_dims:
                target_shape = (img['height'], img['width'], max(ids()) + 1)
            ann_ids = coco.getAnnIds(imgIds=img['id'], iscrowd=None)
            anns = coco.loadAnns(ann_ids)
            mask_one_hot = np.zeros(target_shape, dtype=np.uint8)
            mask_one_hot[:, :, 0] = 1  # every pixel begins as background
            # mask_one_hot = cv2.resize(mask_one_hot,
            #                           target_shape[:2],
            #                           interpolation=cv2.INTER_NEAREST)

            for ann in anns:
                mask_partial = coco.annToMask(ann)
                # mask_partial = cv2.resize(mask_partial,
                #                           (target_shape[1], target_shape[0]),
                #                           interpolation=cv2.INTER_NEAREST)
                # # width and height match
                # assert mask_one_hot.shape[:2] == mask_partial.shape[:2]
                #    print('another shape:',
                #          mask_one_hot[mask_partial > 0].shape)
                mask_one_hot[mask_partial > 0, ann['category_id']] = 1
                mask_one_hot[mask_partial > 0, 0] = 0

            np.save(filename, mask_one_hot)


@data_coco.command
def coco_to_pascal_voc_imageset_txt(voc_imageset_txt_paths, image_dirs,
                                    image_extensions):
    # os.environ["CUDA_VISIBLE_DEVICES"] = '1'
    # Get some image/annotation pairs for example
    for imgset_path, img_dir, t_ext in zip(
            voc_imageset_txt_paths, image_dirs, image_extensions):
        with open(imgset_path, 'w') as txtfile:
            [txtfile.write(os.path.splitext(os.path.basename(file))[0] + '\n')
             for file in os.listdir(img_dir) if file.endswith(t_ext)]


@data_coco.command
def coco_image_segmentation_stats(seg_mask_output_paths, annotation_paths,
                                  seg_mask_image_paths, verbose):
    for (seg_mask_path, annFile, image_path) in zip(
            seg_mask_output_paths, annotation_paths, seg_mask_image_paths):
        print('Loading COCO Annotations File: ', annFile)
        print('Segmentation Mask Output Folder: ', seg_mask_path)
        print('Source Image Folder: ', image_path)
        stats_json = os.path.join(seg_mask_path,
                                  'image_segmentation_class_stats.json')
        print('Image stats will be saved to:', stats_json)
        cat_csv = os.path.join(seg_mask_path,
                               'class_counts_over_sum_category_counts.csv')
        print('Category weights will be saved to:', cat_csv)
        coco = COCO(annFile)
        print('Annotation file info:')
        coco.info()
        print('category ids, not including 0 for background:')
        print(coco.getCatIds())
        # display COCO categories and supercategories
        cats = coco.loadCats(coco.getCatIds())
        nms = [cat['name'] for cat in cats]
        print('categories: \n\n', ' '.join(nms))

        nms = set([cat['supercategory'] for cat in cats])
        print('supercategories: \n', ' '.join(nms))
        img_ids = coco.getImgIds()
        use_original_dims = True  # not target_shape
        max_ids = max(ids()) + 1  # add background category
        # 0 indicates no category (not even background) for counting bins
        max_bin_count = max_ids + 1
        bin_count = np.zeros(max_bin_count)
        total_pixels = 0

        print('Calculating image segmentation stats...')
        progbar = Progbar(len(img_ids), verbose=verbose)
        i = 0
        for idx, img_id in enumerate(img_ids):
            img = coco.loadImgs(img_id)[0]
            i += 1
            progbar.update(i)
            ann_ids = coco.getAnnIds(imgIds=img['id'], iscrowd=None)
            anns = coco.loadAnns(ann_ids)
            target_shape = (img['height'], img['width'], max_ids)
            # print('\ntarget_shape:', target_shape)
            mask_one_hot = np.zeros(target_shape, dtype=np.uint8)

            # Note to only count background pixels once, we define a temporary
            # null class of 0, and shift all class category ids up by 1
            mask_one_hot[:, :, 0] = 1  # every pixel begins as background

            for ann in anns:
                mask_partial = coco.annToMask(ann)
                above_zero = mask_partial > 0
                mask_one_hot[above_zero, ann['category_id']] = ann['category_id'] + 1
                mask_one_hot[above_zero, 0] = 0

            # print( mask_one_hot)
            # print('initial bin_count shape:', np.shape(bin_count))
            # flat_mask_one_hot = mask_one_hot.flatten()
            bincount_result = np.bincount(mask_one_hot.flatten())
            # print('bincount_result TYPE:', type(bincount_result))
            # np.array(np.ndarray.flatten(np.bincount(np.ndarray.
            # flatten(np.array(mask_one_hot)).astype(int))).resize(max_bin_count))
            # print('bincount_result:', bincount_result)
            # print('bincount_result_shape', np.shape(bincount_result))
            length = int(np.shape(bincount_result)[0])
            zeros_to_add = max_bin_count - length
            z = np.zeros(zeros_to_add)
            # print('zeros_to_add TYPE:', type(zeros_to_add))
            # this is a workaround because for some strange reason the
            # output type of bincount couldn't interact with other numpy arrays
            bincount_result_long = bincount_result.tolist() + z.tolist()
            # bincount_result = bincount_result.resize(max_bin_count)
            # print('bincount_result2:', bincount_result_long)
            # print('bincount_result2_shape',bincount_result_long)
            bin_count = bin_count + np.array(bincount_result_long)
            total_pixels += (img['height'] * img['width'])

        print('Final Tally:')
        # shift categories back down by 1
        bin_count = bin_count[1:]
        category_ids = range(bin_count.size)
        sum_category_counts = np.sum(bin_count)

        # sum will be =1 as a pixel can be in multiple categories
        category_counts_over_sum_category_counts = \
            np.true_divide(bin_count.astype(np.float64), sum_category_counts)
        np.savetxt(cat_csv, category_counts_over_sum_category_counts)

        # sum will be >1 as a pixel can be in multiple categories
        category_counts_over_total_pixels = \
            np.true_divide(bin_count.astype(np.float64), total_pixels)

        # less common categories have more weight, sum = 1
        category_counts_p_complement = \
            [1 - x if x > 0.0 else 0.0
             for x in category_counts_over_sum_category_counts]

        # less common categories have more weight, sum > 1
        total_pixels_p_complement = \
            [1 - x if x > 0.0 else 0.0
             for x in category_counts_over_total_pixels]

        print(bin_count)
        stat_dict = {
            'total_pixels': total_pixels,
            'category_counts': dict(zip(category_ids, bin_count)),
            'sum_category_counts': sum_category_counts,
            'category_counts_over_sum_category_counts':
                dict(zip(category_ids,
                         category_counts_over_sum_category_counts)),
            'category_counts_over_total_pixels':
                dict(zip(category_ids, category_counts_over_total_pixels)),
            'category_counts_p_complement':
                dict(zip(category_ids, category_counts_p_complement)),
            'total_pixels_p_complement':
                dict(zip(category_ids, total_pixels_p_complement)),
            'ids': ids(),
            'categories': categories()
        }
        print(stat_dict)
        with open(stats_json, 'w') as fjson:
            json.dump(stat_dict, fjson, ensure_ascii=False)


@data_coco.command
def coco_setup(dataset_root, dataset_path, data_prefixes,
               filenames, urls, md5s, annotation_paths,
               image_dirs, seg_mask_output_paths, verbose,
               image_extensions):
    # download the dataset
    coco_download(dataset_path, filenames, dataset_root,
                  urls, md5s, annotation_paths)
    # convert the relevant files to a more useful format
    coco_json_to_segmentation(seg_mask_output_paths, annotation_paths)
    coco_to_pascal_voc_imageset_txt(voc_imageset_txt_paths, image_dirs,
                                    image_extensions)


@data_coco.automain
def main(dataset_root, dataset_path, data_prefixes,
         filenames, urls, md5s, annotation_paths,
         image_dirs, seg_mask_output_paths):
    coco_config()
    coco_setup(data_prefixes, dataset_path, filenames, dataset_root, urls,
               md5s, annotation_paths, image_dirs,
               seg_mask_output_paths)