Commit 8de66223 authored by maming's avatar maming
Browse files

Initial commit

parents
Pipeline #3358 canceled with stages
import numpy as np
from keras.callbacks import Callback
from keras import backend as K
class DeadReluDetector(Callback):
"""Reports the number of dead ReLUs after each training epoch
ReLU is considered to be dead if it did not fire once for entire training set
# Arguments
x_train: Training dataset to check whether or not neurons fire
verbose: verbosity mode
True means that even a single dead neuron triggers a warning message
False means that only significant number of dead neurons (10% or more)
triggers a warning message
"""
def __init__(self, x_train, verbose=False):
super(DeadReluDetector, self).__init__()
self.x_train = x_train
self.verbose = verbose
self.dead_neurons_share_threshold = 0.1
@staticmethod
def is_relu_layer(layer):
# Should work for all layers with relu
# activation. Tested for Dense and Conv2D
return layer.get_config().get('activation', None) == 'relu'
def get_relu_activations(self):
model_input = self.model.input
is_multi_input = isinstance(model_input, list)
if not is_multi_input:
model_input = [model_input]
funcs = {}
for index, layer in enumerate(self.model.layers):
if not layer.get_weights():
continue
funcs[index] = K.function(model_input
+ [K.learning_phase()], [layer.output])
if is_multi_input:
list_inputs = []
list_inputs.extend(self.x_train)
list_inputs.append(1.)
else:
list_inputs = [self.x_train, 1.]
layer_outputs = {}
for index, func in funcs.items():
layer_outputs[index] = func(list_inputs)[0]
for layer_index, layer_activations in layer_outputs.items():
if self.is_relu_layer(self.model.layers[layer_index]):
layer_name = self.model.layers[layer_index].name
# layer_weight is a list [W] (+ [b])
layer_weight = self.model.layers[layer_index].get_weights()
# with kernel and bias, the weights are saved as a list [W, b].
# If only weights, it is [W]
if type(layer_weight) is not list:
raise ValueError("'Layer_weight' should be a list, "
"but was {}".format(type(layer_weight)))
# there are no weights for current layer; skip it
# this is only legitimate if layer is "Activation"
if len(layer_weight) == 0:
continue
layer_weight_shape = np.shape(layer_weight[0])
yield [layer_index,
layer_activations,
layer_name,
layer_weight_shape]
def on_epoch_end(self, epoch, logs={}):
for relu_activation in self.get_relu_activations():
layer_index = relu_activation[0]
activation_values = relu_activation[1]
layer_name = relu_activation[2]
layer_weight_shape = relu_activation[3]
shape_act = activation_values.shape
weight_len = len(layer_weight_shape)
act_len = len(shape_act)
# should work for both Conv and Flat
if K.image_data_format() == 'channels_last':
# features in last axis
axis_filter = -1
else:
# features before the convolution axis, for weight_
# len the input and output have to be subtracted
axis_filter = -1 - (weight_len - 2)
total_featuremaps = shape_act[axis_filter]
axis = []
for i in range(act_len):
if (i != axis_filter) and (i != (len(shape_act) + axis_filter)):
axis.append(i)
axis = tuple(axis)
dead_neurons = np.sum(np.sum(activation_values, axis=axis) == 0)
dead_neurons_share = float(dead_neurons) / float(total_featuremaps)
if ((self.verbose and dead_neurons > 0)
or dead_neurons_share >= self.dead_neurons_share_threshold):
str_warning = ('Layer {} (#{}) has {} '
'dead neurons ({:.2%})!').format(layer_name,
layer_index,
dead_neurons,
dead_neurons_share)
print(str_warning)
from __future__ import absolute_import
from __future__ import print_function
import os
import numpy as np
from keras.callbacks import Callback, ModelCheckpoint, LearningRateScheduler
try:
import requests
except ImportError:
requests = None
class SnapshotModelCheckpoint(Callback):
"""Callback that saves the snapshot weights of the model.
Saves the model weights on certain epochs (which can be considered the
snapshot of the model at that epoch).
Should be used with the cosine annealing learning rate schedule to save
the weight just before learning rate is sharply increased.
# Arguments:
nb_epochs: total number of epochs that the model will be trained for.
nb_snapshots: number of times the weights of the model will be saved.
fn_prefix: prefix for the filename of the weights.
"""
def __init__(self, nb_epochs, nb_snapshots, fn_prefix='Model'):
super(SnapshotModelCheckpoint, self).__init__()
self.check = nb_epochs // nb_snapshots
self.fn_prefix = fn_prefix
def on_epoch_end(self, epoch, logs={}):
if epoch != 0 and (epoch + 1) % self.check == 0:
filepath = self.fn_prefix + '-%d.h5' % ((epoch + 1) // self.check)
self.model.save_weights(filepath, overwrite=True)
# print("Saved snapshot at weights/%s_%d.h5" % (self.fn_prefix, epoch))
class SnapshotCallbackBuilder:
"""Callback builder for snapshot ensemble training of a model.
From the paper "Snapshot Ensembles: Train 1, Get M For Free" (
https://openreview.net/pdf?id=BJYwwY9ll)
Creates a list of callbacks, which are provided when training a model
so as to save the model weights at certain epochs, and then sharply
increase the learning rate.
"""
def __init__(self, nb_epochs, nb_snapshots, init_lr=0.1):
"""
Initialize a snapshot callback builder.
# Arguments:
nb_epochs: total number of epochs that the model will be trained for.
nb_snapshots: number of times the weights of the model will be saved.
init_lr: initial learning rate
"""
self.T = nb_epochs
self.M = nb_snapshots
self.alpha_zero = init_lr
def get_callbacks(self, model_prefix='Model'):
"""
Creates a list of callbacks that can be used during training to create a
snapshot ensemble of the model.
Args:
model_prefix: prefix for the filename of the weights.
Returns: list of 3 callbacks [ModelCheckpoint, LearningRateScheduler,
SnapshotModelCheckpoint] which can be provided to the 'fit' function
"""
if not os.path.exists('weights/'):
os.makedirs('weights/')
callback_list = [ModelCheckpoint('weights/%s-Best.h5' % model_prefix,
monitor='val_acc',
save_best_only=True, save_weights_only=True),
LearningRateScheduler(schedule=self._cosine_anneal_schedule),
SnapshotModelCheckpoint(self.T,
self.M,
fn_prefix='weights/%s' % model_prefix)]
return callback_list
def _cosine_anneal_schedule(self, t):
cos_inner = np.pi * (t % (self.T // self.M))
cos_inner /= self.T // self.M
cos_out = np.cos(cos_inner) + 1
return float(self.alpha_zero / 2 * cos_out)
from keras.callbacks import TensorBoard
import numpy as np
import os
class TensorBoardGrouped(TensorBoard):
"""TensorBoard basic visualizations.
[TensorBoard](https://www.tensorflow.org/guide/summaries_and_tensorboard)
is a visualization tool provided with TensorFlow.
This callback is a subclass of `keras.callbacks.TensorBoard`.
The only difference is that the training and validation logs are
grouped and written to the same plot.
It's a drop-in replacement for the keras callback.
The arguments are the same.
"""
def __init__(self, log_dir='./logs', *args, **kwargs):
self.base_log_dir = log_dir
self.train_log_dir = os.path.join(log_dir, 'train')
self.val_log_dir = os.path.join(log_dir, 'val')
super(TensorBoardGrouped, self).__init__(self.train_log_dir,
*args,
**kwargs)
def set_model(self, model):
super(TensorBoardGrouped, self).set_model(model)
import tensorflow as tf
self.val_writer = tf.summary.FileWriter(self.val_log_dir)
def _write_logs(self, logs, index):
import tensorflow as tf
for name, value in logs.items():
if name in ['batch', 'size']:
continue
if name.startswith('val_'):
writer = self.val_writer
name = name[4:] # remove val_
else:
writer = self.writer
summary = tf.Summary()
summary_value = summary.value.add()
if isinstance(value, np.ndarray):
summary_value.simple_value = value.item()
else:
summary_value.simple_value = value
summary_value.tag = name
writer.add_summary(summary, index)
self.writer.flush()
self.val_writer.flush()
def on_train_end(self, _):
self.writer.close()
self.val_writer.flush()
from __future__ import absolute_import
from .clip import Clip
# Aliases.
clip = Clip
from __future__ import absolute_import
from keras import backend as K
from keras.constraints import Constraint
class Clip(Constraint):
"""Clips weights to [-c, c].
# Arguments
c: Clipping parameter.
"""
def __init__(self, c=0.01):
self.c = c
def __call__(self, p):
return K.clip(p, -self.c, self.c)
def get_config(self):
return {'name': self.__class__.__name__,
'c': self.c}
#!/usr/bin/env python
# coding=utf-8
"""
This is a script for downloading and converting the microsoft coco dataset
from mscoco.org. This can be run as an independent executable to download
the dataset or be imported by scripts used for larger experiments.
"""
from __future__ import division, print_function, unicode_literals
import os
import errno
import zipfile
import json
from sacred import Experiment, Ingredient
import numpy as np
from PIL import Image
from keras.utils import get_file
from keras.utils.generic_utils import Progbar
from pycocotools.coco import COCO
def palette():
max_cid = max(ids()) + 1
return [(cid, cid, cid) for cid in range(max_cid)]
def cids_to_ids_map():
return {cid: idx for idx, cid in enumerate(ids())}
def ids():
return [0,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53,
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73,
74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
def id_to_palette_map():
return {idx: color for idx, color in enumerate(palette())}
# return {0: (0, 0, 0), idx: (idx, idx, idx)
# for idx, _ in enumerate(categories())}
def cid_to_palette_map():
return {ids()[idx]: color for idx, color in enumerate(palette())}
def palette_to_id_map():
return {color: ids()[idx] for idx, color in enumerate(palette())}
# return {(0, 0, 0): 0, (idx, idx, idx): idx
# for idx, _ in enumerate(categories())}
def class_weight(image_segmentation_stats_file=None,
weighting_algorithm='total_pixels_p_complement'):
# weights = defaultdict(lambda: 1.5)
if image_segmentation_stats_file is None:
weights = {i: 1.5 for i in ids()}
weights[0] = 0.5
return weights
else:
with open(image_segmentation_stats_file, 'r') as fjson:
stats = json.loads(fjson)
return stats[weighting_algorithm]
def mask_to_palette_map(cid):
mapper = id_to_palette_map()
return {0: mapper[0], 255: mapper[cid]}
def categories(): # 80 classes
return ['background', # class zero
'person', 'bicycle', 'car', 'motorcycle',
'airplane', 'bus', 'train',
'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite',
'baseball bat', 'baseball glove', 'skateboard',
'surfboard', 'tennis racket', 'bottle',
'wine glass', 'cup', 'fork', 'knife',
'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
'dining table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book',
'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
def id_to_category(category_id):
return {cid: categories()[idx] for idx, cid in enumerate(ids())}[category_id]
def category_to_cid_map():
return {category: ids()[idx] for idx, category in enumerate(categories())}
def mkdir_p(path):
# http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
# ============== Ingredient 2: dataset =======================
data_coco = Experiment("dataset")
@data_coco.config
def coco_config():
# TODO(ahundt) add md5 sums for each file
verbose = 1
coco_api = 'https://github.com/pdollar/coco/'
dataset_root = os.path.join(os.path.expanduser('~'), 'datasets')
dataset_path = os.path.join(dataset_root, 'coco')
urls = [
'coco2014/train2014.zip',
'coco2014/val2014.zip',
'coco2014/test2014.zip',
'coco2015/test2015.zip',
'annotations-1-0-3/instances_train-val2014.zip',
'annotations-1-0-3/person_keypoints_trainval2014.zip',
'annotations-1-0-4/image_info_test2014.zip',
'annotations-1-0-4/image_info_test2015.zip',
'annotations-1-0-3/captions_train-val2014.zip'
]
base_url = 'http://msvocds.blob.core.windows.net/'
urls = [base_url + x for x in urls]
data_prefixes = [
'train2014',
'val2014',
'test2014',
'test2015',
]
image_filenames = [prefix + '.zip' for prefix in data_prefixes]
annotation_filenames = [
'instances_train-val2014.zip', # training AND validation info
'image_info_test2014.zip', # basic info like download links + category
'image_info_test2015.zip', # basic info like download links + category
'person_keypoints_trainval2014.zip', # elbows, head, wrist etc
'captions_train-val2014.zip', # descriptions of images
]
md5s = [
'0da8c0bd3d6becc4dcb32757491aca88', # train2014.zip
'a3d79f5ed8d289b7a7554ce06a5782b3', # val2014.zip
'04127eef689ceac55e3a572c2c92f264', # test2014.zip
'65562e58af7d695cc47356951578c041', # test2015.zip
'59582776b8dd745d649cd249ada5acf7', # instances_train-val2014.zip
'926b9df843c698817ee62e0e049e3753', # person_keypoints_trainval2014.zip
'f3366b66dc90d8ae0764806c95e43c86', # image_info_test2014.zip
'8a5ad1a903b7896df7f8b34833b61757', # image_info_test2015.zip
'5750999c8c964077e3c81581170be65b' # captions_train-val2014.zip
]
filenames = image_filenames + annotation_filenames
seg_mask_path = os.path.join(dataset_path, 'seg_mask')
annotation_json = [
'annotations/instances_train2014.json',
'annotations/instances_val2014.json'
]
annotation_paths = [os.path.join(dataset_path, postfix)
for postfix in annotation_json]
# only first two data prefixes contain segmentation masks
seg_mask_image_paths = [os.path.join(dataset_path, prefix)
for prefix in data_prefixes[0:1]]
seg_mask_output_paths = [os.path.join(seg_mask_path, prefix)
for prefix in data_prefixes[0:1]]
seg_mask_extensions = ['.npy' for prefix in data_prefixes[0:1]]
image_dirs = [os.path.join(dataset_path, prefix) for prefix in data_prefixes]
image_extensions = ['.jpg' for prefix in data_prefixes]
voc_imageset_txt_paths = [os.path.join(dataset_path,
'annotations', prefix + '.txt')
for prefix in data_prefixes]
@data_coco.capture
def coco_files(dataset_path, filenames, dataset_root, urls, md5s, annotation_paths):
print(dataset_path)
print(dataset_root)
print(urls)
print(filenames)
print(md5s)
print(annotation_paths)
return [os.path.join(dataset_path, file) for file in filenames]
@data_coco.command
def print_coco_files(dataset_path, filenames, dataset_root,
urls, md5s, annotation_paths):
coco_files(dataset_path, filenames, dataset_root, urls, md5s, annotation_paths)
@data_coco.command
def coco_download(dataset_path, filenames, dataset_root,
urls, md5s, annotation_paths):
zip_paths = coco_files(dataset_path, filenames, dataset_root,
urls, md5s, annotation_paths)
for url, filename, md5 in zip(urls, filenames, md5s):
path = get_file(filename, url, md5_hash=md5,
extract=True, cache_subdir=dataset_path)
# TODO(ahundt) check if it is already extracted, don't re-extract. see
# https://github.com/fchollet/keras/issues/5861
zip_file = zipfile.ZipFile(path, 'r')
zip_file.extractall(path=dataset_path)
zip_file.close()
@data_coco.command
def coco_json_to_segmentation(seg_mask_output_paths,
annotation_paths, seg_mask_image_paths, verbose):
for (seg_mask_path, annFile, image_path) in zip(
seg_mask_output_paths, annotation_paths, seg_mask_image_paths):
print('Loading COCO Annotations File: ', annFile)
print('Segmentation Mask Output Folder: ', seg_mask_path)
print('Source Image Folder: ', image_path)
print('\n'
'WARNING: Each pixel can have multiple classes! That means'
'class data overlaps. Also, single objects can be outlined'
'multiple times because they were labeled by different people!'
'In other words, even a single object may be segmented twice.'
'This means the .png files are missing entire objects.\n\n'
'Use of categorical one-hot encoded .npy files is recommended,'
'but .npy files also have limitations, because the .npy files'
'only have one label per pixel for each class,'
'and currently take the union of multiple human class labels.'
'Improving how your data is handled will improve your results'
'so remember to consider that limitation. There is still'
'an opportunity to improve how this training data is handled &'
'integrated with your training scripts and utilities...')
coco = COCO(annFile)
print('Converting Annotations to Segmentation Masks...')
mkdir_p(seg_mask_path)
total_imgs = len(coco.imgToAnns.keys())
progbar = Progbar(total_imgs + len(coco.getImgIds()), verbose=verbose)
# 'annotations' was previously 'instances' in an old version
for img_num in range(total_imgs):
# Both [0]'s are used to extract the element from a list
img = coco.loadImgs(
coco.imgToAnns[coco.imgToAnns.keys()[img_num]][0]['image_id'])[0]
h = img['height']
w = img['width']
name = img['file_name']
root_name = name[:-4]
filename = os.path.join(seg_mask_path, root_name + ".png")
file_exists = os.path.exists(filename)
if file_exists:
progbar.update(img_num, [('file_fraction_already_exists', 1)])
continue
else:
progbar.update(img_num, [('file_fraction_already_exists', 0)])
print(filename)
MASK = np.zeros((h, w), dtype=np.uint8)
np.where(MASK > 0)
for ann in coco.imgToAnns[coco.imgToAnns.keys()[img_num]]:
mask = coco.annToMask(ann)
idxs = np.where(mask > 0)
MASK[idxs] = ann['category_id']
im = Image.fromarray(MASK)
im.save(filename)
print('\nConverting Annotations to one hot encoded'
'categorical .npy Segmentation Masks...')
img_ids = coco.getImgIds()
use_original_dims = True # not target_shape
for idx, img_id in enumerate(img_ids):
img = coco.loadImgs(img_id)[0]
name = img['file_name']
root_name = name[:-4]
filename = os.path.join(seg_mask_path, root_name + ".npy")
file_exists = os.path.exists(filename)
if file_exists:
progbar.add(1, [('file_fraction_already_exists', 1)])
continue
else:
progbar.add(1, [('file_fraction_already_exists', 0)])
if use_original_dims:
target_shape = (img['height'], img['width'], max(ids()) + 1)
ann_ids = coco.getAnnIds(imgIds=img['id'], iscrowd=None)
anns = coco.loadAnns(ann_ids)
mask_one_hot = np.zeros(target_shape, dtype=np.uint8)
mask_one_hot[:, :, 0] = 1 # every pixel begins as background
# mask_one_hot = cv2.resize(mask_one_hot,
# target_shape[:2],
# interpolation=cv2.INTER_NEAREST)
for ann in anns:
mask_partial = coco.annToMask(ann)
# mask_partial = cv2.resize(mask_partial,
# (target_shape[1], target_shape[0]),
# interpolation=cv2.INTER_NEAREST)
# # width and height match
# assert mask_one_hot.shape[:2] == mask_partial.shape[:2]
# print('another shape:',
# mask_one_hot[mask_partial > 0].shape)
mask_one_hot[mask_partial > 0, ann['category_id']] = 1
mask_one_hot[mask_partial > 0, 0] = 0
np.save(filename, mask_one_hot)
@data_coco.command
def coco_to_pascal_voc_imageset_txt(voc_imageset_txt_paths, image_dirs,
image_extensions):
# os.environ["CUDA_VISIBLE_DEVICES"] = '1'
# Get some image/annotation pairs for example
for imgset_path, img_dir, t_ext in zip(
voc_imageset_txt_paths, image_dirs, image_extensions):
with open(imgset_path, 'w') as txtfile:
[txtfile.write(os.path.splitext(os.path.basename(file))[0] + '\n')
for file in os.listdir(img_dir) if file.endswith(t_ext)]
@data_coco.command
def coco_image_segmentation_stats(seg_mask_output_paths, annotation_paths,
seg_mask_image_paths, verbose):
for (seg_mask_path, annFile, image_path) in zip(
seg_mask_output_paths, annotation_paths, seg_mask_image_paths):
print('Loading COCO Annotations File: ', annFile)
print('Segmentation Mask Output Folder: ', seg_mask_path)
print('Source Image Folder: ', image_path)
stats_json = os.path.join(seg_mask_path,
'image_segmentation_class_stats.json')
print('Image stats will be saved to:', stats_json)
cat_csv = os.path.join(seg_mask_path,
'class_counts_over_sum_category_counts.csv')
print('Category weights will be saved to:', cat_csv)
coco = COCO(annFile)
print('Annotation file info:')
coco.info()
print('category ids, not including 0 for background:')
print(coco.getCatIds())
# display COCO categories and supercategories
cats = coco.loadCats(coco.getCatIds())
nms = [cat['name'] for cat in cats]
print('categories: \n\n', ' '.join(nms))
nms = set([cat['supercategory'] for cat in cats])
print('supercategories: \n', ' '.join(nms))
img_ids = coco.getImgIds()
use_original_dims = True # not target_shape
max_ids = max(ids()) + 1 # add background category
# 0 indicates no category (not even background) for counting bins
max_bin_count = max_ids + 1
bin_count = np.zeros(max_bin_count)
total_pixels = 0
print('Calculating image segmentation stats...')
progbar = Progbar(len(img_ids), verbose=verbose)
i = 0
for idx, img_id in enumerate(img_ids):
img = coco.loadImgs(img_id)[0]
i += 1
progbar.update(i)
ann_ids = coco.getAnnIds(imgIds=img['id'], iscrowd=None)
anns = coco.loadAnns(ann_ids)
target_shape = (img['height'], img['width'], max_ids)
# print('\ntarget_shape:', target_shape)
mask_one_hot = np.zeros(target_shape, dtype=np.uint8)
# Note to only count background pixels once, we define a temporary
# null class of 0, and shift all class category ids up by 1
mask_one_hot[:, :, 0] = 1 # every pixel begins as background
for ann in anns:
mask_partial = coco.annToMask(ann)
above_zero = mask_partial > 0
mask_one_hot[above_zero, ann['category_id']] = ann['category_id'] + 1
mask_one_hot[above_zero, 0] = 0
# print( mask_one_hot)
# print('initial bin_count shape:', np.shape(bin_count))
# flat_mask_one_hot = mask_one_hot.flatten()
bincount_result = np.bincount(mask_one_hot.flatten())
# print('bincount_result TYPE:', type(bincount_result))
# np.array(np.ndarray.flatten(np.bincount(np.ndarray.
# flatten(np.array(mask_one_hot)).astype(int))).resize(max_bin_count))
# print('bincount_result:', bincount_result)
# print('bincount_result_shape', np.shape(bincount_result))
length = int(np.shape(bincount_result)[0])
zeros_to_add = max_bin_count - length
z = np.zeros(zeros_to_add)
# print('zeros_to_add TYPE:', type(zeros_to_add))
# this is a workaround because for some strange reason the
# output type of bincount couldn't interact with other numpy arrays
bincount_result_long = bincount_result.tolist() + z.tolist()
# bincount_result = bincount_result.resize(max_bin_count)
# print('bincount_result2:', bincount_result_long)
# print('bincount_result2_shape',bincount_result_long)
bin_count = bin_count + np.array(bincount_result_long)
total_pixels += (img['height'] * img['width'])
print('Final Tally:')
# shift categories back down by 1
bin_count = bin_count[1:]
category_ids = range(bin_count.size)
sum_category_counts = np.sum(bin_count)
# sum will be =1 as a pixel can be in multiple categories
category_counts_over_sum_category_counts = \
np.true_divide(bin_count.astype(np.float64), sum_category_counts)
np.savetxt(cat_csv, category_counts_over_sum_category_counts)
# sum will be >1 as a pixel can be in multiple categories
category_counts_over_total_pixels = \
np.true_divide(bin_count.astype(np.float64), total_pixels)
# less common categories have more weight, sum = 1
category_counts_p_complement = \
[1 - x if x > 0.0 else 0.0
for x in category_counts_over_sum_category_counts]
# less common categories have more weight, sum > 1
total_pixels_p_complement = \
[1 - x if x > 0.0 else 0.0
for x in category_counts_over_total_pixels]
print(bin_count)
stat_dict = {
'total_pixels': total_pixels,
'category_counts': dict(zip(category_ids, bin_count)),
'sum_category_counts': sum_category_counts,
'category_counts_over_sum_category_counts':
dict(zip(category_ids,
category_counts_over_sum_category_counts)),
'category_counts_over_total_pixels':
dict(zip(category_ids, category_counts_over_total_pixels)),
'category_counts_p_complement':
dict(zip(category_ids, category_counts_p_complement)),
'total_pixels_p_complement':
dict(zip(category_ids, total_pixels_p_complement)),
'ids': ids(),
'categories': categories()
}
print(stat_dict)
with open(stats_json, 'w') as fjson:
json.dump(stat_dict, fjson, ensure_ascii=False)
@data_coco.command
def coco_setup(dataset_root, dataset_path, data_prefixes,
filenames, urls, md5s, annotation_paths,
image_dirs, seg_mask_output_paths, verbose,
image_extensions):
# download the dataset
coco_download(dataset_path, filenames, dataset_root,
urls, md5s, annotation_paths)
# convert the relevant files to a more useful format
coco_json_to_segmentation(seg_mask_output_paths, annotation_paths)
coco_to_pascal_voc_imageset_txt(voc_imageset_txt_paths, image_dirs,
image_extensions)
@data_coco.automain
def main(dataset_root, dataset_path, data_prefixes,
filenames, urls, md5s, annotation_paths,
image_dirs, seg_mask_output_paths):
coco_config()
coco_setup(data_prefixes, dataset_path, filenames, dataset_root, urls,
md5s, annotation_paths, image_dirs,
seg_mask_output_paths)
from __future__ import print_function
import numpy
from keras.utils.data_utils import get_file
from zipfile import ZipFile
from collections import Counter
from keras.preprocessing.sequence import pad_sequences
def load_data(path='conll2000.zip', min_freq=2):
path = get_file(path,
origin='https://raw.githubusercontent.com/nltk'
'/nltk_data/gh-pages/packages/corpora/conll2000.zip')
print(path)
archive = ZipFile(path, 'r')
train = _parse_data(archive.open('conll2000/train.txt'))
test = _parse_data(archive.open('conll2000/test.txt'))
archive.close()
word_counts = Counter(row[0].lower() for sample in train for row in sample)
vocab = ['<pad>', '<unk>']
vocab += [w for w, f in iter(word_counts.items()) if f >= min_freq]
# in alphabetic order
pos_tags = sorted(list(set(row[1] for sample in train + test for row in sample)))
# in alphabetic order
chunk_tags = sorted(list(set(row[2] for sample in train + test for row in sample)))
train = _process_data(train, vocab, pos_tags, chunk_tags)
test = _process_data(test, vocab, pos_tags, chunk_tags)
return train, test, (vocab, pos_tags, chunk_tags)
def _parse_data(fh):
string = fh.read()
data = []
for sample in string.decode().strip().split('\n\n'):
data.append([row.split() for row in sample.split('\n')])
fh.close()
return data
def _process_data(data, vocab, pos_tags, chunk_tags, maxlen=None, onehot=False):
if maxlen is None:
maxlen = max(len(s) for s in data)
word2idx = dict((w, i) for i, w in enumerate(vocab))
# set to <unk> (index 1) if not in vocab
x = [[word2idx.get(w[0].lower(), 1) for w in s] for s in data]
y_pos = [[pos_tags.index(w[1]) for w in s] for s in data]
y_chunk = [[chunk_tags.index(w[2]) for w in s] for s in data]
x = pad_sequences(x, maxlen) # left padding
# lef padded with -1. Indeed, any integer works as it will be masked
y_pos = pad_sequences(y_pos, maxlen, value=-1)
y_chunk = pad_sequences(y_chunk, maxlen, value=-1)
if onehot:
y_pos = numpy.eye(len(pos_tags), dtype='float32')[y]
y_chunk = numpy.eye(len(chunk_tags), dtype='float32')[y]
else:
y_pos = numpy.expand_dims(y_pos, 2)
y_chunk = numpy.expand_dims(y_chunk, 2)
return x, y_pos, y_chunk
#!/usr/bin/env python
# coding=utf-8
"""
This is a script for downloading and converting the pascal voc 2012 dataset
and the berkeley extended version.
# original PASCAL VOC 2012
# 2 GB
# http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
# berkeley augmented Pascal VOC
# 1.3 GB
# http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz
This can be run as an independent executable to download
the dataset or be imported by scripts used for larger experiments.
If you aren't sure run this to do a full download + conversion setup of the dataset:
./data_pascal_voc.py pascal_voc_setup
""" # pylint: disable=E501
from __future__ import division, print_function, unicode_literals
import os
import shutil
import errno
from sacred import Ingredient, Experiment
from keras.utils import get_file
import skimage.io as io
# ============== Ingredient 2: dataset =======================
data_pascal_voc = Experiment("dataset")
def mkdir_p(path):
# http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
def pascal_segmentation_lut():
"""Return look-up table with number and correspondng class names
for PASCAL VOC segmentation dataset. Two special classes are: 0 -
background and 255 - ambigious region. All others are numerated from
1 to 20.
Returns
-------
classes_lut : dict
look-up table with number and correspondng class names
"""
class_names = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
'dog', 'horse', 'motorbike', 'person', 'potted-plant',
'sheep', 'sofa', 'train', 'tv/monitor', 'ambigious']
enumerated_array = enumerate(class_names[:-1])
classes_lut = list(enumerated_array)
# Add a special class representing ambigious regions
# which has index 255.
classes_lut.append((255, class_names[-1]))
classes_lut = dict(classes_lut)
return classes_lut
def get_pascal_segmentation_images_lists_txts(pascal_root):
"""Return full paths to files in PASCAL VOC with train and val image name lists.
This function returns full paths to files which contain names of images
and respective annotations for the segmentation in PASCAL VOC.
Parameters
----------
pascal_root : string
Full path to the root of PASCAL VOC dataset.
Returns
-------
full_filenames_txts : [string, string, string]
Array that contains paths for train/val/trainval txts with images names.
"""
segmentation_relative_folder = 'ImageSets/Segmentation'
segmentation_folder = os.path.join(pascal_root, segmentation_relative_folder)
pascal_train_list_filename = os.path.join(segmentation_folder, 'train.txt')
pascal_validation_list_filename = os.path.join(segmentation_folder, 'val.txt')
pascal_trainval_list_filename = os.path.join(segmentation_folder, 'trainval.txt')
return [
pascal_train_list_filename,
pascal_validation_list_filename,
pascal_trainval_list_filename
]
def readlines_with_strip(filename):
"""Reads lines from specified file with whitespaced removed on both sides.
The function reads each line in the specified file and applies string.strip()
function to each line which results in removing all whitespaces on both ends
of each string. Also removes the newline symbol which is usually present
after the lines wre read using readlines() function.
Parameters
----------
filename : string
Full path to the root of PASCAL VOC dataset.
Returns
-------
clean_lines : array of strings
Strings that were read from the file and cleaned up.
"""
# Get raw filnames from the file
with open(filename, 'r') as f:
lines = f.readlines()
# Clean filenames from whitespaces and newline symbols
return map(lambda x: x.strip(), lines)
def readlines_with_strip_array_version(filenames_array):
"""The function that is similar to readlines_with_strip() but for filenames array.
Applies readlines_with_strip() to each filename in the array.
Parameters
----------
filenames_array : array of strings
Array of strings. Each specifies a path to a file.
Returns
-------
clean_lines : array of (array of strings)
Strings that were read from the file and cleaned up.
"""
return map(readlines_with_strip, filenames_array)
def add_full_path_and_extention_to_filenames(filenames_array, full_path, extention):
"""Concatenates full path to the left of the image and file extention to the right.
The function accepts array of filenames without fullpath and extention like 'cat'
and adds specified full path and extetion to each of the filenames in the array like
'full/path/to/somewhere/cat.jpg.
Parameters
----------
filenames_array : array of strings
Array of strings representing filenames
full_path : string
Full path string to be added on the left to each filename
extention : string
Extention string to be added on the right to each filename
Returns
-------
full_filenames : array of strings
updated array with filenames
"""
return map(lambda x: os.path.join(full_path, x) + '.' + extention, filenames_array)
def add_full_path_and_extention_to_filenames_array_version(filenames_array_array,
full_path,
extention):
"""Array version of the add_full_path_and_extention_to_filenames() function.
Applies add_full_path_and_extention_to_filenames() to each element of array.
Parameters
----------
filenames_array_array : array of array of strings
Array of strings representing filenames
full_path : string
Full path string to be added on the left to each filename
extention : string
Extention string to be added on the right to each filename
Returns
-------
full_filenames : array of array of strings
updated array of array with filenames
"""
return map(lambda x: add_full_path_and_extention_to_filenames(x,
full_path,
extention),
filenames_array_array)
def get_pascal_segmentation_image_annotation_filenames_pairs(pascal_root):
"""Return (image, annotation) filenames pairs from PASCAL VOC segmentation dataset.
Returns three dimensional array where first dimension represents the type
of the dataset: train, val or trainval in the respective order. Second
dimension represents the a pair of images in that belongs to a particular
dataset. And third one is responsible for the first or second element in the
dataset.
Parameters
----------
pascal_root : string
Path to the PASCAL VOC dataset root that is usually named 'VOC2012'
after being extracted from tar file.
Returns
-------
image_annotation_filename_pairs :
Array with filename pairs.
"""
pascal_relative_images_folder = 'JPEGImages'
pascal_relative_class_annotations_folder = 'SegmentationClass'
images_extention = 'jpg'
annotations_extention = 'png'
pascal_images_folder = os.path.join(
pascal_root, pascal_relative_images_folder)
pascal_class_annotations_folder = os.path.join(
pascal_root, pascal_relative_class_annotations_folder)
pascal_images_lists_txts = get_pascal_segmentation_images_lists_txts(
pascal_root)
pascal_image_names = readlines_with_strip_array_version(
pascal_images_lists_txts)
images_full_names = add_full_path_and_extention_to_filenames_array_version(
pascal_image_names,
pascal_images_folder,
images_extention,
)
annotations_full_names = add_full_path_and_extention_to_filenames_array_version(
pascal_image_names,
pascal_class_annotations_folder,
annotations_extention,
)
# Combine so that we have [(images full filenames, annotation full names), .. ]
# where each element in the array represent train, val, trainval sets.
# Overall, we have 3 elements in the array.
temp = zip(images_full_names, annotations_full_names)
# Now we should combine the elements of images full filenames annotation full names
# so that we have pairs of respective image plus annotation
# [[(pair_1), (pair_1), ..], [(pair_1), (pair_2), ..] ..]
# Overall, we have 3 elements -- representing train/val/trainval datasets
image_annotation_filename_pairs = map(lambda x: zip(*x), temp)
return image_annotation_filename_pairs
@data_pascal_voc.command
def convert_pascal_berkeley_augmented_mat_annotations_to_png(
pascal_berkeley_augmented_root):
""" Creates a new folder in the root folder of the dataset with annotations stored
in .png. The function accepts a full path to the root of Berkeley augmented Pascal
VOC segmentation dataset and converts annotations that are stored in .mat files to
.png files. It creates a new folder dataset/cls_png where all the converted files
will be located. If this directory already exists the function does nothing. The
Berkley augmented dataset can be downloaded from here:
http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz
Parameters
----------
pascal_berkeley_augmented_root : string
Full path to the root of augmented Berkley PASCAL VOC dataset.
""" # pylint: disable=E501
import scipy.io
def read_class_annotation_array_from_berkeley_mat(mat_filename, key='GTcls'):
# Mat to png conversion for
# http://www.cs.berkeley.edu/~bharath2/codes/SBD/download.html
# 'GTcls' key is for class segmentation
# 'GTinst' key is for instance segmentation
# Credit:
# https://github.com/martinkersner/train-DeepLab/blob/master/utils.py
mat = scipy.io.loadmat(mat_filename, mat_dtype=True,
squeeze_me=True, struct_as_record=False)
return mat[key].Segmentation
mat_file_extension_string = '.mat'
png_file_extension_string = '.png'
relative_path_to_annotation_mat_files = 'dataset/cls'
relative_path_to_annotation_png_files = 'dataset/cls_png'
mat_file_extension_string_length = len(mat_file_extension_string)
annotation_mat_files_fullpath = os.path.join(pascal_berkeley_augmented_root,
relative_path_to_annotation_mat_files)
annotation_png_save_fullpath = os.path.join(pascal_berkeley_augmented_root,
relative_path_to_annotation_png_files)
# Create the folder where all the converted png files will be placed
# If the folder already exists, do nothing
if not os.path.exists(annotation_png_save_fullpath):
os.makedirs(annotation_png_save_fullpath)
else:
return
mat_files_names = os.listdir(annotation_mat_files_fullpath)
for current_mat_file_name in mat_files_names:
current_file_name_without_extention = current_mat_file_name[
:-mat_file_extension_string_length]
current_mat_file_full_path = os.path.join(annotation_mat_files_fullpath,
current_mat_file_name)
current_png_file_full_path_to_be_saved = os.path.join(
annotation_png_save_fullpath,
current_file_name_without_extention,
)
current_png_file_full_path_to_be_saved += png_file_extension_string
annotation_array = read_class_annotation_array_from_berkeley_mat(
current_mat_file_full_path)
# TODO: hide 'low-contrast' image warning during saving.
io.imsave(current_png_file_full_path_to_be_saved, annotation_array)
def get_pascal_berkeley_augmented_segmentation_images_lists_txts(pascal_berkeley_root):
"""Return full paths to files in PASCAL Berkley augmented VOC with train and
val image name lists. This function returns full paths to files which contain names
of images and respective annotations for the segmentation in PASCAL VOC.
Parameters
----------
pascal_berkeley_root : string
Full path to the root of PASCAL VOC Berkley augmented dataset.
Returns
-------
full_filenames_txts : [string, string]
Array that contains paths for train/val txts with images names.
"""
segmentation_relative_folder = 'dataset'
segmentation_folder = os.path.join(pascal_berkeley_root,
segmentation_relative_folder)
# TODO: add function that will joing both train.txt and val.txt into
# trainval.txt
pascal_train_list_filename = os.path.join(segmentation_folder,
'train.txt')
pascal_validation_list_filename = os.path.join(segmentation_folder,
'val.txt')
return [
pascal_train_list_filename,
pascal_validation_list_filename
]
def get_pascal_berkeley_augmented_segmentation_image_annotation_filenames_pairs(
pascal_berkeley_root):
"""Return (image, annotation) filenames pairs from PASCAL Berkeley VOC segmentation
dataset. Returns three dimensional array where first dimension represents the type
of the dataset: train, val in the respective order. Second
dimension represents the a pair of images in that belongs to a particular
dataset. And third one is responsible for the first or second element in the
dataset.
Parameters
----------
pascal_berkeley_root : string
Path to the PASCAL Berkeley VOC dataset root that is usually named
'benchmark_RELEASE' after being extracted from tar file.
Returns
-------
image_annotation_filename_pairs :
Array with filename pairs.
"""
pascal_relative_images_folder = 'dataset/img'
pascal_relative_class_annotations_folder = 'dataset/cls_png'
images_extention = 'jpg'
annotations_extention = 'png'
pascal_images_folder = os.path.join(
pascal_berkeley_root, pascal_relative_images_folder)
pascal_class_annotations_folder = os.path.join(
pascal_berkeley_root, pascal_relative_class_annotations_folder)
pascal_images_lists_txts = (
get_pascal_berkeley_augmented_segmentation_images_lists_txts(
pascal_berkeley_root))
pascal_image_names = readlines_with_strip_array_version(
pascal_images_lists_txts)
images_full_names = add_full_path_and_extention_to_filenames_array_version(
pascal_image_names,
pascal_images_folder,
images_extention,
)
annotations_full_names = add_full_path_and_extention_to_filenames_array_version(
pascal_image_names,
pascal_class_annotations_folder,
annotations_extention,
)
# Combine so that we have [(images full filenames, annotation full names), .. ]
# where each element in the array represent train, val, trainval sets.
# Overall, we have 3 elements in the array.
temp = zip(images_full_names, annotations_full_names)
# Now we should combine the elements of images full filenames annotation full names
# so that we have pairs of respective image plus annotation
# [[(pair_1), (pair_1), ..], [(pair_1), (pair_2), ..] ..]
# Overall, we have 3 elements -- representing train/val/trainval datasets
image_annotation_filename_pairs = map(lambda x: zip(*x), temp)
return image_annotation_filename_pairs
def get_pascal_berkeley_augmented_selected_image_annotation_filenames_pairs(
pascal_berkeley_root,
selected_names,
):
"""Returns (image, annotation) filenames pairs from PASCAL Berkeley VOC segmentation
dataset for selected names. The function accepts the selected file names from PASCAL
Berkeley VOC segmentation dataset and returns image, annotation pairs with fullpath
and extention for those names.
Parameters
----------
pascal_berkeley_root : string
Path to the PASCAL Berkeley VOC dataset root that is usually named
'benchmark_RELEASE' after being extracted from tar file.
selected_names : array of strings
Selected filenames from PASCAL VOC Berkeley that can be read from txt files that
come with dataset.
Returns
-------
image_annotation_pairs :
Array with filename pairs with fullnames.
"""
pascal_relative_images_folder = 'dataset/img'
pascal_relative_class_annotations_folder = 'dataset/cls_png'
images_extention = 'jpg'
annotations_extention = 'png'
pascal_images_folder = os.path.join(
pascal_berkeley_root, pascal_relative_images_folder)
pascal_class_annotations_folder = os.path.join(
pascal_berkeley_root, pascal_relative_class_annotations_folder)
images_full_names = add_full_path_and_extention_to_filenames(
selected_names,
pascal_images_folder,
images_extention,
)
annotations_full_names = add_full_path_and_extention_to_filenames(
selected_names,
pascal_class_annotations_folder,
annotations_extention,
)
image_annotation_pairs = zip(images_full_names,
annotations_full_names)
return image_annotation_pairs
def get_pascal_selected_image_annotation_filenames_pairs(pascal_root, selected_names):
"""Returns (image, annotation) filenames pairs from PASCAL VOC segmentation dataset
for selected names. The function accepts the selected file names from PASCAL VOC
segmentation dataset and returns image, annotation pairs with fullpath and extention
for those names.
Parameters
----------
pascal_root : string
Path to the PASCAL VOC dataset root that is usually named 'VOC2012'
after being extracted from tar file.
selected_names : array of strings
Selected filenames from PASCAL VOC that can be read from txt files that
come with dataset.
Returns
-------
image_annotation_pairs :
Array with filename pairs with fullnames.
"""
pascal_relative_images_folder = 'JPEGImages'
pascal_relative_class_annotations_folder = 'SegmentationClass'
images_extention = 'jpg'
annotations_extention = 'png'
pascal_images_folder = os.path.join(
pascal_root, pascal_relative_images_folder)
pascal_class_annotations_folder = os.path.join(
pascal_root, pascal_relative_class_annotations_folder)
images_full_names = add_full_path_and_extention_to_filenames(selected_names,
pascal_images_folder,
images_extention)
annotations_full_names = add_full_path_and_extention_to_filenames(
selected_names,
pascal_class_annotations_folder,
annotations_extention,
)
image_annotation_pairs = zip(images_full_names,
annotations_full_names)
return image_annotation_pairs
def get_augmented_pascal_image_annotation_filename_pairs(pascal_root,
pascal_berkeley_root,
mode=2):
"""Returns image/annotation filenames pairs train/val splits from combined Pascal
VOC. Returns two arrays with train and validation split respectively that has
image full filename/ annotation full filename pairs in each of the that were derived
from PASCAL and PASCAL Berkeley Augmented dataset. The Berkley augmented dataset
can be downloaded from here:
http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz
Consider running convert_pascal_berkeley_augmented_mat_annotations_to_png() after
extraction.
The PASCAL VOC dataset can be downloaded from here:
http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
Consider specifying root full names for both of them as arguments for this function
after extracting them.
The function has three type of train/val splits(credit matconvnet-fcn):
Let BT, BV, PT, PV, and PX be the Berkeley training and validation
sets and PASCAL segmentation challenge training, validation, and
test sets. Let T, V, X the final trainig, validation, and test
sets.
Mode 1::
V = PV (same validation set as PASCAL)
Mode 2:: (default))
V = PV \ BT (PASCAL val set that is not a Berkeley training
image)
Mode 3::
V = PV \ (BV + BT)
In all cases:
S = PT + PV + BT + BV
X = PX (the test set is uncahgend)
T = (S \ V) \ X (the rest is training material)
Parameters
----------
pascal_root : string
Path to the PASCAL VOC dataset root that is usually named 'VOC2012'
after being extracted from tar file.
pascal_berkeley_root : string
Path to the PASCAL Berkeley VOC dataset root that is usually named
'benchmark_RELEASE' after being extracted from tar file.
mode: int
The type of train/val data split. Read the function main description for more
info.
Returns
-------
image_annotation_pairs : Array with filename pairs with fullnames.
[[(str, str), .. , (str, str)][(str, str), .., (str, str)]]
""" # pylint: disable=E501
pascal_txts = get_pascal_segmentation_images_lists_txts(
pascal_root=pascal_root)
berkeley_txts = get_pascal_berkeley_augmented_segmentation_images_lists_txts(
pascal_berkeley_root=pascal_berkeley_root)
pascal_name_lists = readlines_with_strip_array_version(pascal_txts)
berkeley_name_lists = readlines_with_strip_array_version(berkeley_txts)
pascal_train_name_set, pascal_val_name_set, _ = map(
lambda x: set(x), pascal_name_lists)
berkeley_train_name_set, berkeley_val_name_set = map(
lambda x: set(x), berkeley_name_lists)
all_berkeley = berkeley_train_name_set | berkeley_val_name_set
all_pascal = pascal_train_name_set | pascal_val_name_set
everything = all_berkeley | all_pascal
# Extract the validation subset based on selected mode
if mode == 1:
# 1449 validation images, 10582 training images
validation = pascal_val_name_set
if mode == 2:
# 904 validatioin images, 11127 training images
validation = pascal_val_name_set - berkeley_train_name_set
if mode == 3:
# 346 validation images, 11685 training images
validation = pascal_val_name_set - all_berkeley
# The rest of the dataset is for training
train = everything - validation
# Get the part that can be extracted from berkeley
train_from_berkeley = train & all_berkeley
# The rest of the data will be loaded from pascal
train_from_pascal = train - train_from_berkeley
train_from_berkeley_image_annotation_pairs = (
get_pascal_berkeley_augmented_selected_image_annotation_filenames_pairs(
pascal_berkeley_root,
list(train_from_berkeley)))
train_from_pascal_image_annotation_pairs = \
get_pascal_selected_image_annotation_filenames_pairs(pascal_root,
list(train_from_pascal))
overall_train_image_annotation_filename_pairs = \
list(train_from_berkeley_image_annotation_pairs) + \
list(train_from_pascal_image_annotation_pairs)
overall_val_image_annotation_filename_pairs = \
get_pascal_selected_image_annotation_filenames_pairs(pascal_root,
validation)
return (overall_train_image_annotation_filename_pairs,
overall_val_image_annotation_filename_pairs)
def pascal_filename_pairs_to_imageset_txt(voc_imageset_txt_path, filename_pairs,
image_extension='.jpg'):
with open(voc_imageset_txt_path, 'w') as txtfile:
[txtfile.write(os.path.splitext(os.path.basename(file1))[0] + '\n')
for file1, file2 in filename_pairs if file1.endswith(image_extension)]
def pascal_combine_annotation_files(filename_pairs, output_annotations_path):
mkdir_p(output_annotations_path)
for img_path, gt_path in filename_pairs:
shutil.copy2(gt_path, output_annotations_path)
@data_pascal_voc.config
def voc_config():
# TODO(ahundt) add md5 sums for each file
verbose = True
dataset_root = os.path.join(os.path.expanduser("~"), '.keras', 'datasets')
dataset_path = dataset_root + '/VOC2012'
# sys.path.append("tf-image-segmentation/")
# os.environ["CUDA_VISIBLE_DEVICES"] = '1'
# based on https://github.com/martinkersner/train-DeepLab
# original PASCAL VOC 2012
# wget
# http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
# # 2 GB
pascal_root = dataset_path + '/VOCdevkit/VOC2012'
# berkeley augmented Pascal VOC
# wget
# http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz
# # 1.3 GB
# Pascal Context
# http://www.cs.stanford.edu/~roozbeh/pascal-context/
# http://www.cs.stanford.edu/~roozbeh/pascal-context/trainval.tar.gz
pascal_berkeley_root = dataset_path + '/benchmark_RELEASE'
urls = [
'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar',
'http://www.eecs.berkeley.edu/Research/Projects/'
'CS/vision/grouping/semantic_contours/benchmark.tgz',
'http://www.cs.stanford.edu/~roozbeh/pascal-context/trainval.tar.gz',
'http://www.cs.stanford.edu/~roozbeh/pascal-context/33_context_labels.tar.gz',
'http://www.cs.stanford.edu/~roozbeh/pascal-context/59_context_labels.tar.gz',
'http://www.cs.stanford.edu/~roozbeh/pascal-context/33_labels.txt',
'http://www.cs.stanford.edu/~roozbeh/pascal-context/59_labels.txt'
]
filenames = ['VOCtrainval_11-May-2012.tar',
'benchmark.tgz',
'trainval.tar.gz',
'33_context_labels.tar.gz',
'59_context_labels.tar.gz',
'33_labels.txt',
'59_labels.txt'
]
md5s = ['6cd6e144f989b92b3379bac3b3de84fd',
'82b4d87ceb2ed10f6038a1cba92111cb',
'df034edb2c12aa7d33b42b20bb1796e3',
'180101cfc01c71867b6686207f071eb9',
'f85d450010762a0e1080304286ce30ed',
'8840f5439b471aecf991ac6448b826e6',
'993901f2d930cc038c406845f08fa082']
combined_imageset_train_txt = dataset_path + '/combined_imageset_train.txt'
combined_imageset_val_txt = dataset_path + '/combined_imageset_val.txt'
combined_annotations_path = dataset_path + '/combined_annotations'
# see get_augmented_pascal_image_annotation_filename_pairs()
voc_data_subset_mode = 2
@data_pascal_voc.capture
def pascal_voc_files(dataset_path, filenames, dataset_root, urls, md5s):
print(dataset_path)
print(dataset_root)
print(urls)
print(filenames)
print(md5s)
return [dataset_path + filename for filename in filenames]
@data_pascal_voc.command
def pascal_voc_download(dataset_path, filenames, dataset_root, urls, md5s):
zip_paths = pascal_voc_files(
dataset_path, filenames, dataset_root, urls, md5s)
for url, filename, md5 in zip(urls, filenames, md5s):
path = get_file(filename, url, md5_hash=md5,
extract=True, cache_subdir=dataset_path)
@data_pascal_voc.command
def pascal_voc_berkeley_combined(dataset_path,
pascal_root,
pascal_berkeley_root,
voc_data_subset_mode,
combined_imageset_train_txt,
combined_imageset_val_txt,
combined_annotations_path):
# Returns a list of (image, annotation)
# filename pairs (filename.jpg, filename.png)
overall_train_image_annotation_filename_pairs, \
overall_val_image_annotation_filename_pairs = \
get_augmented_pascal_image_annotation_filename_pairs(
pascal_root=pascal_root,
pascal_berkeley_root=pascal_berkeley_root,
mode=voc_data_subset_mode)
# combine the annotation files into one folder
pascal_combine_annotation_files(
list(overall_train_image_annotation_filename_pairs) +
list(overall_val_image_annotation_filename_pairs),
combined_annotations_path)
# generate the train imageset txt
pascal_filename_pairs_to_imageset_txt(
combined_imageset_train_txt,
overall_train_image_annotation_filename_pairs
)
# generate the val imageset txt
pascal_filename_pairs_to_imageset_txt(
combined_imageset_val_txt,
overall_val_image_annotation_filename_pairs
)
@data_pascal_voc.command
def pascal_voc_setup(filenames, dataset_path, pascal_root,
pascal_berkeley_root, dataset_root,
voc_data_subset_mode,
urls, md5s,
combined_imageset_train_txt,
combined_imageset_val_txt,
combined_annotations_path):
# download the dataset
pascal_voc_download(dataset_path, filenames,
dataset_root, urls, md5s)
# convert the relevant files to a more useful format
convert_pascal_berkeley_augmented_mat_annotations_to_png(
pascal_berkeley_root)
pascal_voc_berkeley_combined(dataset_path,
pascal_root,
pascal_berkeley_root,
voc_data_subset_mode,
combined_imageset_train_txt,
combined_imageset_val_txt,
combined_annotations_path)
@data_pascal_voc.automain
def main(filenames, dataset_path, pascal_root,
pascal_berkeley_root, dataset_root,
voc_data_subset_mode,
urls, md5s,
combined_imageset_train_txt,
combined_imageset_val_txt,
combined_annotations_path):
voc_config()
pascal_voc_setup(filenames, dataset_path, pascal_root,
pascal_berkeley_root, dataset_root,
voc_data_subset_mode,
urls, md5s,
combined_imageset_train_txt,
combined_imageset_val_txt,
combined_annotations_path)
from __future__ import absolute_import
from .convaware import ConvolutionAware
from __future__ import absolute_import
import numpy as np
from keras import backend as K
from keras.initializers import Initializer, Orthogonal
class ConvolutionAware(Initializer):
"""
Initializer that generates orthogonal convolution filters in the fourier
space. If this initializer is passed a shape that is not 3D or 4D,
orthogonal initialization will be used.
# Arguments
eps_std: Standard deviation for the random normal noise used to break
symmetry in the inverse fourier transform.
seed: A Python integer. Used to seed the random generator.
# References
Armen Aghajanyan, https://arxiv.org/abs/1702.06295
"""
def __init__(self, eps_std=0.05, seed=None):
self.eps_std = eps_std
self.seed = seed
self.orthogonal = Orthogonal()
def __call__(self, shape):
rank = len(shape)
if self.seed is not None:
np.random.seed(self.seed)
fan_in, fan_out = _compute_fans(shape, K.image_data_format())
variance = 2 / fan_in
if rank == 3:
row, stack_size, filters_size = shape
transpose_dimensions = (2, 1, 0)
kernel_shape = (row,)
correct_ifft = lambda shape, s=[None]: np.fft.irfft(shape, s[0])
correct_fft = np.fft.rfft
elif rank == 4:
row, column, stack_size, filters_size = shape
transpose_dimensions = (2, 3, 0, 1)
kernel_shape = (row, column)
correct_ifft = np.fft.irfft2
correct_fft = np.fft.rfft2
elif rank == 5:
x, y, z, stack_size, filters_size = shape
transpose_dimensions = (3, 4, 0, 1, 2)
kernel_shape = (x, y, z)
correct_fft = np.fft.rfftn
correct_ifft = np.fft.irfftn
else:
return K.variable(self.orthogonal(shape), dtype=K.floatx())
kernel_fourier_shape = correct_fft(np.zeros(kernel_shape)).shape
init = []
for i in range(filters_size):
basis = self._create_basis(
stack_size, np.prod(kernel_fourier_shape))
basis = basis.reshape((stack_size,) + kernel_fourier_shape)
filters = [correct_ifft(x, kernel_shape) +
np.random.normal(0, self.eps_std, kernel_shape) for
x in basis]
init.append(filters)
# Format of array is now: filters, stack, row, column
init = np.array(init)
init = self._scale_filters(init, variance)
return init.transpose(transpose_dimensions)
def _create_basis(self, filters, size):
if size == 1:
return np.random.normal(0.0, self.eps_std, (filters, size))
nbb = filters // size + 1
li = []
for i in range(nbb):
a = np.random.normal(0.0, 1.0, (size, size))
a = self._symmetrize(a)
u, _, v = np.linalg.svd(a)
li.extend(u.T.tolist())
p = np.array(li[:filters], dtype=K.floatx())
return p
def _symmetrize(self, a):
return a + a.T - np.diag(a.diagonal())
def _scale_filters(self, filters, variance):
c_var = np.var(filters)
p = np.sqrt(variance / c_var)
return filters * p
def get_config(self):
return {
'eps_std': self.eps_std,
'seed': self.seed
}
def _compute_fans(shape, data_format='channels_last'):
"""Computes the number of input and output units for a weight shape.
# Arguments
shape: Integer shape tuple.
data_format: Image data format to use for convolution kernels.
Note that all kernels in Keras are standardized on the
`channels_last` ordering (even when inputs are set
to `channels_first`).
# Returns
A tuple of scalars, `(fan_in, fan_out)`.
# Raises
ValueError: in case of invalid `data_format` argument.
"""
if len(shape) == 2:
fan_in = shape[0]
fan_out = shape[1]
elif len(shape) in {3, 4, 5}:
# Assuming convolution kernels (1D, 2D or 3D).
# TH kernel shape: (depth, input_depth, ...)
# TF kernel shape: (..., input_depth, depth)
if data_format == 'channels_first':
receptive_field_size = np.prod(shape[2:])
fan_in = shape[1] * receptive_field_size
fan_out = shape[0] * receptive_field_size
elif data_format == 'channels_last':
receptive_field_size = np.prod(shape[:-2])
fan_in = shape[-2] * receptive_field_size
fan_out = shape[-1] * receptive_field_size
else:
raise ValueError('Invalid data_format: ' + data_format)
else:
# No specific assumptions.
fan_in = np.sqrt(np.prod(shape))
fan_out = np.sqrt(np.prod(shape))
return fan_in, fan_out
from __future__ import absolute_import
from .advanced_activations.pelu import PELU
from .advanced_activations.srelu import SReLU
from .advanced_activations.swish import Swish
from .advanced_activations.sinerelu import SineReLU
from .convolutional.cosineconvolution2d import CosineConv2D
from .convolutional.cosineconvolution2d import CosineConvolution2D
from .convolutional.subpixelupscaling import SubPixelUpscaling
from .core import CosineDense
from .crf import CRF
from .capsule import Capsule
from .normalization.instancenormalization import InstanceNormalization
from .normalization.groupnormalization import GroupNormalization
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment