Unverified Commit f8919197 authored by Hang Zhang's avatar Hang Zhang Committed by GitHub
Browse files
parent d4e19553
// adapted from: http://www.robots.ox.ac.uk/~vedaldi/assets/hidebib.js
function hideallbibs()
{
var el = document.getElementsByTagName("div") ;
for (var i = 0 ; i < el.length ; ++i) {
if (el[i].className == "paper") {
var bib = el[i].getElementsByTagName("pre") ;
if (bib.length > 0) {
bib [0] .style.display = 'none' ;
}
}
}
}
function togglebib(paperid)
{
var paper = document.getElementById(paperid) ;
var bib = paper.getElementsByTagName('pre') ;
if (bib.length > 0) {
if (bib [0] .style.display == 'none') {
bib [0] .style.display = 'block' ;
} else {
bib [0] .style.display = 'none' ;
}
}
}
function toggleblock(blockId)
{
var block = document.getElementById(blockId);
if (block.style.display == 'none') {
block.style.display = 'block' ;
} else {
block.style.display = 'none' ;
}
}
function hideblock(blockId)
{
var block = document.getElementById(blockId);
block.style.display = 'none' ;
}
...@@ -3,5 +3,5 @@ ...@@ -3,5 +3,5 @@
{%- block extrahead %} {%- block extrahead %}
<script type="text/javascript" src="http://zhanghang1989.github.io/files/hidebib.js"></script> <script type="text/javascript" src="../_static/js/hidebib.js"></script>
{% endblock %} {% endblock %}
...@@ -23,26 +23,34 @@ Test Pre-trained Model ...@@ -23,26 +23,34 @@ Test Pre-trained Model
model = encoding.models.get_model('FCN_ResNet50_PContext', pretrained=True) model = encoding.models.get_model('FCN_ResNet50_PContext', pretrained=True)
Prepare the datasets by runing the scripts in the ``scripts/`` folder, for example preparing ``PASCAL Context`` dataset::
python scripts/prepare_pcontext.py
The test script is in the ``experiments/segmentation/`` folder. For evaluating the model (using MS), The test script is in the ``experiments/segmentation/`` folder. For evaluating the model (using MS),
for example ``Encnet_ResNet50_PContext``:: for example ``Encnet_ResNet50_PContext``::
python test.py --dataset PContext --model-zoo Encnet_ResNet50_PContext --eval python test.py --dataset PContext --model-zoo Encnet_ResNet50_PContext --eval
# pixAcc: 0.7888, mIoU: 0.5056: 100%|████████████████████████| 1276/1276 [46:31<00:00, 2.19s/it] # pixAcc: 0.792, mIoU: 0.510: 100%|████████████████████████| 1276/1276 [46:31<00:00, 2.19s/it]
The command for training the model can be found by clicking ``cmd`` in the table. The command for training the model can be found by clicking ``cmd`` in the table.
.. role:: raw-html(raw) .. role:: raw-html(raw)
:format: html :format: html
+----------------------------------+-----------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+ +----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+
| Model | pixAcc | mIoU | Note | Command | Logs | | Model | pixAcc | mIoU | Command | Logs |
+==================================+===========+===========+===========+==============================================================================================+============+ +==================================+===========+===========+==============================================================================================+============+
| Encnet_ResNet50_PContext | 78.9% | 50.6% | | :raw-html:`<a href="javascript:toggleblock('cmd_enc50_pcont')" class="toggleblock">cmd</a>` | ENC50PC_ | | Encnet_ResNet50_PContext | 79.2% | 51.0% | :raw-html:`<a href="javascript:toggleblock('cmd_enc50_pcont')" class="toggleblock">cmd</a>` | ENC50PC_ |
+----------------------------------+-----------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+ +----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+
| EncNet_ResNet101_PContext | 80.3% | 53.2% | | :raw-html:`<a href="javascript:toggleblock('cmd_enc101_pcont')" class="toggleblock">cmd</a>` | ENC101PC_ | | EncNet_ResNet101_PContext | 80.7% | 54.1% | :raw-html:`<a href="javascript:toggleblock('cmd_enc101_pcont')" class="toggleblock">cmd</a>` | ENC101PC_ |
+----------------------------------+-----------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+ +----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+
| EncNet_ResNet50_ADE | 79.9% | 41.2% | | :raw-html:`<a href="javascript:toggleblock('cmd_enc50_ade')" class="toggleblock">cmd</a>` | ENC50ADE_ | | EncNet_ResNet50_ADE | 80.1% | 41.5% | :raw-html:`<a href="javascript:toggleblock('cmd_enc50_ade')" class="toggleblock">cmd</a>` | ENC50ADE_ |
+----------------------------------+-----------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+ +----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+
| EncNet_ResNet101_ADE | 81.3% | 44.4% | :raw-html:`<a href="javascript:toggleblock('cmd_enc101_ade')" class="toggleblock">cmd</a>` | ENC101ADE_ |
+----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+
| EncNet_ResNet101_VOC | N/A | 85.9% | :raw-html:`<a href="javascript:toggleblock('cmd_enc101_voc')" class="toggleblock">cmd</a>` | ENC101VOC_ |
+----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+
.. _ENC50PC: https://github.com/zhanghang1989/image-data/blob/master/encoding/segmentation/logs/encnet_resnet50_pcontext.log?raw=true .. _ENC50PC: https://github.com/zhanghang1989/image-data/blob/master/encoding/segmentation/logs/encnet_resnet50_pcontext.log?raw=true
.. _ENC101PC: https://github.com/zhanghang1989/image-data/blob/master/encoding/segmentation/logs/encnet_resnet101_pcontext.log?raw=true .. _ENC101PC: https://github.com/zhanghang1989/image-data/blob/master/encoding/segmentation/logs/encnet_resnet101_pcontext.log?raw=true
...@@ -71,6 +79,19 @@ Test Pre-trained Model ...@@ -71,6 +79,19 @@ Test Pre-trained Model
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset ADE20K --model EncNet --aux --se-loss CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset ADE20K --model EncNet --aux --se-loss
</code> </code>
<code xml:space="preserve" id="cmd_enc101_ade" style="display: none; text-align: left; white-space: pre-wrap">
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset ADE20K --model EncNet --aux --se-loss --backbone resnet101
</code>
<code xml:space="preserve" id="cmd_enc101_voc" style="display: none; text-align: left; white-space: pre-wrap">
# First finetuning COCO dataset pretrained model on augmented set
# You can also train from scratch on COCO by yourself
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset Pascal_aug --model-zoo EncNet_Resnet101_COCO --aux --se-loss --lr 0.001 --syncbn --ngpus 4 --checkname res101
# Finetuning on original set
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset Pascal_voc --model encnet --aux --se-loss --backbone resnet101 --lr 0.0001 --syncbn --ngpus 4 --checkname res101 --resume runs/Pascal_aug/encnet/res101/checkpoint.params
</code>
Quick Demo Quick Demo
~~~~~~~~~~ ~~~~~~~~~~
...@@ -116,13 +137,14 @@ Train Your Own Model ...@@ -116,13 +137,14 @@ Train Your Own Model
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset pcontext --model encnet --aux --se-loss CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset pcontext --model encnet --aux --se-loss
- Detail training options, please run ``python train.py -h``. - Detail training options, please run ``python train.py -h``. Commands for reproducing pre-trained models can be found in the table.
- The validation metrics during the training only using center-crop is just for monitoring the .. hint::
training correctness purpose. For evaluating the pretrained model on validation set using MS, The validation metrics during the training only using center-crop is just for monitoring the
please use the command:: training correctness purpose. For evaluating the pretrained model on validation set using MS,
please use the command::
CUDA_VISIBLE_DEVICES=0,1,2,3 python test.py --dataset pcontext --model encnet --aux --se-loss --resume mycheckpoint --eval CUDA_VISIBLE_DEVICES=0,1,2,3 python test.py --dataset pcontext --model encnet --aux --se-loss --resume mycheckpoint --eval
Citation Citation
-------- --------
......
...@@ -17,16 +17,21 @@ Test Pre-trained Model ...@@ -17,16 +17,21 @@ Test Pre-trained Model
- Install PyTorch Encoding (if not yet). Please follow the installation guide `Installing PyTorch Encoding <../notes/compile.html>`_. - Install PyTorch Encoding (if not yet). Please follow the installation guide `Installing PyTorch Encoding <../notes/compile.html>`_.
- Download the `MINC-2500 <http://opensurfaces.cs.cornell.edu/publications/minc/>`_ dataset to ``$HOME/data/minc-2500/`` folder. Download pre-trained model (pre-trained on train-1 split using single training size of 224, with an error rate of :math:`19.70\%` using single crop on test-1 set):: - Download the `MINC-2500 <http://opensurfaces.cs.cornell.edu/publications/minc/>`_ dataset using the providied script::
cd PyTorch-Encoding/experiments/recognition cd PyTorch-Encoding/
python scripts/prepare_minc.py
- Download pre-trained model (pre-trained on train-1 split using single training size of 224, with an error rate of :math:`19.70\%` using single crop on test-1 set)::
cd experiments/recognition
python model/download_models.py python model/download_models.py
- Test pre-trained model on MINC-2500:: - Test pre-trained model on MINC-2500::
python main.py --dataset minc --model deepten --nclass 23 --resume deepten_minc.pth --eval python main.py --dataset minc --model deepten --nclass 23 --resume deepten_minc.pth --eval
# Teriminal Output: # Teriminal Output:
# Loss: 1.005 | Err: 19.704% (1133/5750): 100%|████████████████████| 23/23 [00:18<00:00, 1.26it/s] # Loss: 1.005 | Err: 18.96% (1090/5750): 100%|████████████████████| 23/23 [00:18<00:00, 1.26it/s]
Train Your Own Model Train Your Own Model
...@@ -34,7 +39,7 @@ Train Your Own Model ...@@ -34,7 +39,7 @@ Train Your Own Model
- Example training command for training above model:: - Example training command for training above model::
CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py --dataset minc --model deepten --nclass 23 --model deepten --batch-size 512 --lr 0.004 --epochs 80 --lr-step 60 CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py --dataset minc --model deepten --nclass 23 --model deepten --batch-size 512 --lr 0.004 --epochs 80 --lr-step 60 --lr-scheduler step
- Detail training options:: - Detail training options::
...@@ -56,8 +61,6 @@ Train Your Own Model ...@@ -56,8 +61,6 @@ Train Your Own Model
--checkname set the checkpoint name --checkname set the checkpoint name
--eval evaluating --eval evaluating
.. todo::
Provide example code for extracting features.
Extending the Software Extending the Software
---------------------- ----------------------
......
...@@ -20,10 +20,10 @@ encoding.functions ...@@ -20,10 +20,10 @@ encoding.functions
.. autofunction:: aggregate .. autofunction:: aggregate
:hidden:`scaledL2` :hidden:`scaled_l2`
~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~
.. autofunction:: scaledL2 .. autofunction:: scaled_l2
:hidden:`sum_square` :hidden:`sum_square`
......
...@@ -10,4 +10,4 @@ ...@@ -10,4 +10,4 @@
"""An optimized PyTorch package with CUDA backend.""" """An optimized PyTorch package with CUDA backend."""
from .version import __version__ from .version import __version__
from . import nn, functions, dilated, parallel, utils, models, datasets from . import nn, functions, dilated, parallel, utils, models, datasets, optimizer
from .base import * from .base import *
from .coco import COCOSegmentation
from .ade20k import ADE20KSegmentation from .ade20k import ADE20KSegmentation
from .pascal_voc import VOCSegmentation from .pascal_voc import VOCSegmentation
from .pascal_aug import VOCAugSegmentation from .pascal_aug import VOCAugSegmentation
from .pcontext import ContextSegmentation from .pcontext import ContextSegmentation
from .cityscapes import CitySegmentation
datasets = { datasets = {
'coco': COCOSegmentation,
'ade20k': ADE20KSegmentation, 'ade20k': ADE20KSegmentation,
'pascal_voc': VOCSegmentation, 'pascal_voc': VOCSegmentation,
'pascal_aug': VOCAugSegmentation, 'pascal_aug': VOCAugSegmentation,
'pcontext': ContextSegmentation, 'pcontext': ContextSegmentation,
'citys': CitySegmentation,
} }
def get_segmentation_dataset(name, **kwargs): def get_segmentation_dataset(name, **kwargs):
......
...@@ -58,8 +58,8 @@ class ADE20KSegmentation(BaseDataset): ...@@ -58,8 +58,8 @@ class ADE20KSegmentation(BaseDataset):
return img, mask return img, mask
def _mask_transform(self, mask): def _mask_transform(self, mask):
target = np.array(mask).astype('int32') - 1 target = np.array(mask).astype('int64') - 1
return torch.from_numpy(target).long() return torch.from_numpy(target)
def __len__(self): def __len__(self):
return len(self.images) return len(self.images)
...@@ -90,17 +90,22 @@ def _get_ade20k_pairs(folder, split='train'): ...@@ -90,17 +90,22 @@ def _get_ade20k_pairs(folder, split='train'):
img_folder = os.path.join(folder, 'images/training') img_folder = os.path.join(folder, 'images/training')
mask_folder = os.path.join(folder, 'annotations/training') mask_folder = os.path.join(folder, 'annotations/training')
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
print('len(img_paths):', len(img_paths))
assert len(img_paths) == 20210
elif split == 'val': elif split == 'val':
img_folder = os.path.join(folder, 'images/validation') img_folder = os.path.join(folder, 'images/validation')
mask_folder = os.path.join(folder, 'annotations/validation') mask_folder = os.path.join(folder, 'annotations/validation')
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
assert len(img_paths) == 2000
else: else:
assert split == 'trainval'
train_img_folder = os.path.join(folder, 'images/training') train_img_folder = os.path.join(folder, 'images/training')
train_mask_folder = os.path.join(folder, 'annotations/training') train_mask_folder = os.path.join(folder, 'annotations/training')
val_img_folder = os.path.join(folder, 'images/validation') val_img_folder = os.path.join(folder, 'images/validation')
val_mask_folder = os.path.join(folder, 'annotations/validation') val_mask_folder = os.path.join(folder, 'annotations/validation')
train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder) train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder)
val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder) val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder)
return train_img_paths + val_img_paths, train_mask_paths + val_mask_paths img_paths = train_img_paths + val_img_paths
mask_paths = train_mask_paths + val_mask_paths
assert len(img_paths) == 22210
return img_paths, mask_paths return img_paths, mask_paths
...@@ -37,6 +37,9 @@ class BaseDataset(data.Dataset): ...@@ -37,6 +37,9 @@ class BaseDataset(data.Dataset):
def pred_offset(self): def pred_offset(self):
raise NotImplemented raise NotImplemented
def make_pred(self, x):
return x + self.pred_offset
def _val_sync_transform(self, img, mask): def _val_sync_transform(self, img, mask):
outsize = self.crop_size outsize = self.crop_size
short_size = outsize short_size = outsize
...@@ -75,10 +78,6 @@ class BaseDataset(data.Dataset): ...@@ -75,10 +78,6 @@ class BaseDataset(data.Dataset):
ow = int(1.0 * w * oh / h) ow = int(1.0 * w * oh / h)
img = img.resize((ow, oh), Image.BILINEAR) img = img.resize((ow, oh), Image.BILINEAR)
mask = mask.resize((ow, oh), Image.NEAREST) mask = mask.resize((ow, oh), Image.NEAREST)
# random rotate -10~10, mask using NN rotate
deg = random.uniform(-10, 10)
img = img.rotate(deg, resample=Image.BILINEAR)
mask = mask.rotate(deg, resample=Image.NEAREST)
# pad crop # pad crop
if short_size < crop_size: if short_size < crop_size:
padh = crop_size - oh if oh < crop_size else 0 padh = crop_size - oh if oh < crop_size else 0
......
###########################################################################
# Created by: Hang Zhang
# Email: zhang.hang@rutgers.edu
# Copyright (c) 2018
###########################################################################
import os
import sys
import numpy as np
from tqdm import tqdm, trange
from PIL import Image, ImageOps, ImageFilter
import torch
import torch.utils.data as data
import torchvision.transforms as transform
from .base import BaseDataset
class CitySegmentation(BaseDataset):
NUM_CLASS = 19
def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train',
mode=None, transform=None, target_transform=None, **kwargs):
super(CitySegmentation, self).__init__(
root, split, mode, transform, target_transform, **kwargs)
#self.root = os.path.join(root, self.BASE_DIR)
self.images, self.mask_paths = get_city_pairs(self.root, self.split)
assert (len(self.images) == len(self.mask_paths))
if len(self.images) == 0:
raise RuntimeError("Found 0 images in subfolders of: \
" + self.root + "\n")
self._indices = np.array(range(-1, 19))
self._classes = np.array([0, 7, 8, 11, 12, 13, 17, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 31, 32, 33])
self._key = np.array([-1, -1, -1, -1, -1, -1,
-1, -1, 0, 1, -1, -1,
2, 3, 4, -1, -1, -1,
5, -1, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15,
-1, -1, 16, 17, 18])
self._mapping = np.array(range(-1, len(self._key)-1)).astype('int32')
def _class_to_index(self, mask):
# assert the values
values = np.unique(mask)
for i in range(len(values)):
assert(values[i] in self._mapping)
index = np.digitize(mask.ravel(), self._mapping, right=True)
return self._key[index].reshape(mask.shape)
def _preprocess(self, mask_file):
if os.path.exists(mask_file):
masks = torch.load(mask_file)
return masks
masks = []
print("Preprocessing mask, this will take a while." + \
"But don't worry, it only run once for each split.")
tbar = tqdm(self.mask_paths)
for fname in tbar:
tbar.set_description("Preprocessing masks {}".format(fname))
mask = Image.fromarray(self._class_to_index(
np.array(Image.open(fname))).astype('int8'))
masks.append(mask)
torch.save(masks, mask_file)
return masks
def __getitem__(self, index):
img = Image.open(self.images[index]).convert('RGB')
if self.mode == 'test':
if self.transform is not None:
img = self.transform(img)
return img, os.path.basename(self.images[index])
#mask = self.masks[index]
mask = Image.open(self.mask_paths[index])
# synchrosized transform
if self.mode == 'train':
img, mask = self._sync_transform(img, mask)
elif self.mode == 'val':
img, mask = self._val_sync_transform(img, mask)
else:
assert self.mode == 'testval'
mask = self._mask_transform(mask)
# general resize, normalize and toTensor
if self.transform is not None:
img = self.transform(img)
if self.target_transform is not None:
mask = self.target_transform(mask)
return img, mask
def _sync_transform(self, img, mask):
# random mirror
if random.random() < 0.5:
img = img.transpose(Image.FLIP_LEFT_RIGHT)
mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
crop_size = self.crop_size
# random scale (short edge from 480 to 720)
short_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.5))
w, h = img.size
if h > w:
ow = short_size
oh = int(1.0 * h * ow / w)
else:
oh = short_size
ow = int(1.0 * w * oh / h)
img = img.resize((ow, oh), Image.BILINEAR)
mask = mask.resize((ow, oh), Image.NEAREST)
# random rotate -10~10, mask using NN rotate
deg = random.uniform(-10, 10)
img = img.rotate(deg, resample=Image.BILINEAR)
mask = mask.rotate(deg, resample=Image.NEAREST)
# pad crop
if short_size < crop_size:
padh = crop_size - oh if oh < crop_size else 0
padw = crop_size - ow if ow < crop_size else 0
img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0)
# random crop crop_size
w, h = img.size
x1 = random.randint(0, w - crop_size)
y1 = random.randint(0, h - crop_size)
img = img.crop((x1, y1, x1+crop_size, y1+crop_size))
mask = mask.crop((x1, y1, x1+crop_size, y1+crop_size))
# gaussian blur as in PSP
if random.random() < 0.5:
img = img.filter(ImageFilter.GaussianBlur(
radius=random.random()))
# final transform
return img, self._mask_transform(mask)
def _mask_transform(self, mask):
#target = np.array(mask).astype('int32') - 1
target = self._class_to_index(np.array(mask).astype('int32'))
return torch.from_numpy(target).long()
def __len__(self):
return len(self.images)
def make_pred(self, mask):
values = np.unique(mask)
for i in range(len(values)):
assert(values[i] in self._indices)
index = np.digitize(mask.ravel(), self._indices, right=True)
return self._classes[index].reshape(mask.shape)
def get_city_pairs(folder, split='train'):
def get_path_pairs(img_folder, mask_folder):
img_paths = []
mask_paths = []
for root, directories, files in os.walk(img_folder):
for filename in files:
if filename.endswith(".png"):
imgpath = os.path.join(root, filename)
foldername = os.path.basename(os.path.dirname(imgpath))
maskname = filename.replace('leftImg8bit','gtFine_labelIds')
maskpath = os.path.join(mask_folder, foldername, maskname)
if os.path.isfile(imgpath) and os.path.isfile(maskpath):
img_paths.append(imgpath)
mask_paths.append(maskpath)
else:
print('cannot find the mask or image:', imgpath, maskpath)
print('Found {} images in the folder {}'.format(len(img_paths), img_folder))
return img_paths, mask_paths
if split == 'train' or split == 'val' or split == 'test':
img_folder = os.path.join(folder, 'leftImg8bit/' + split)
mask_folder = os.path.join(folder, 'gtFine/'+ split)
img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
return img_paths, mask_paths
else:
assert split == 'trainval'
print('trainval set')
train_img_folder = os.path.join(folder, 'leftImg8bit/train')
train_mask_folder = os.path.join(folder, 'gtFine/train')
val_img_folder = os.path.join(folder, 'leftImg8bit/val')
val_mask_folder = os.path.join(folder, 'gtFine/val')
train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder)
val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder)
img_paths = train_img_paths + val_img_paths
mask_paths = train_mask_paths + val_mask_paths
return img_paths, mask_paths
...@@ -6,51 +6,26 @@ import torch ...@@ -6,51 +6,26 @@ import torch
from .base import BaseDataset from .base import BaseDataset
"""
NUM_CHANNEL = 91
[] background
[5] airplane
[2] bicycle
[16] bird
[9] boat
[44] bottle
[6] bus
[3] car
[17] cat
[62] chair
[21] cow
[67] dining table
[18] dog
[19] horse
[4] motorcycle
[1] person
[64] potted plant
[20] sheep
[63] couch
[7] train
[72] tv
"""
CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4,
1, 64, 20, 63, 7, 72]
class COCOSegmentation(BaseDataset): class COCOSegmentation(BaseDataset):
NUM_CLASS = 21
CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4,
1, 64, 20, 63, 7, 72]
def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train', def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train',
mode=None, transform=None, target_transform=None): mode=None, transform=None, target_transform=None, **kwargs):
super(COCOSegmentation, self).__init__( super(COCOSegmentation, self).__init__(
root, split, mode, transform, target_transform) root, split, mode, transform, target_transform, **kwargs)
from pycocotools.coco import COCO from pycocotools.coco import COCO
from pycocotools import mask from pycocotools import mask
if mode == 'train': if split == 'train':
print('train set') print('train set')
ann_file = os.path.join(root, 'coco/annotations/instances_train2014.json') ann_file = os.path.join(root, 'annotations/instances_train2017.json')
ids_file = os.path.join(root, 'coco/annotations/train_ids.pth') ids_file = os.path.join(root, 'annotations/train_ids.pth')
root = os.path.join(root, 'coco/train2014') self.root = os.path.join(root, 'train2017')
else: else:
print('val set') print('val set')
ann_file = os.path.join(root, 'coco/annotations/instances_val2014.json') ann_file = os.path.join(root, 'annotations/instances_val2017.json')
ids_file = os.path.join(root, 'coco/annotations/val_ids.pth') ids_file = os.path.join(root, 'annotations/val_ids.pth')
root = os.path.join(root, 'coco/val2014') self.root = os.path.join(root, 'val2017')
self.coco = COCO(ann_file) self.coco = COCO(ann_file)
self.coco_mask = mask self.coco_mask = mask
if os.path.exists(ids_file): if os.path.exists(ids_file):
...@@ -68,8 +43,8 @@ class COCOSegmentation(BaseDataset): ...@@ -68,8 +43,8 @@ class COCOSegmentation(BaseDataset):
path = img_metadata['file_name'] path = img_metadata['file_name']
img = Image.open(os.path.join(self.root, path)).convert('RGB') img = Image.open(os.path.join(self.root, path)).convert('RGB')
cocotarget = coco.loadAnns(coco.getAnnIds(imgIds=img_id)) cocotarget = coco.loadAnns(coco.getAnnIds(imgIds=img_id))
mask = Image.fromarray(self._gen_seg_mask(cocotarget, img_metadata['height'], mask = Image.fromarray(self._gen_seg_mask(
img_metadata['width'])) cocotarget, img_metadata['height'], img_metadata['width']))
# synchrosized transform # synchrosized transform
if self.mode == 'train': if self.mode == 'train':
img, mask = self._sync_transform(img, mask) img, mask = self._sync_transform(img, mask)
...@@ -95,8 +70,8 @@ class COCOSegmentation(BaseDataset): ...@@ -95,8 +70,8 @@ class COCOSegmentation(BaseDataset):
rle = coco_mask.frPyObjects(instance['segmentation'], h, w) rle = coco_mask.frPyObjects(instance['segmentation'], h, w)
m = coco_mask.decode(rle) m = coco_mask.decode(rle)
cat = instance['category_id'] cat = instance['category_id']
if cat in CAT_LIST: if cat in self.CAT_LIST:
c = CAT_LIST.index(cat) c = self.CAT_LIST.index(cat)
else: else:
continue continue
if len(m.shape) < 3: if len(m.shape) < 3:
...@@ -124,3 +99,28 @@ class COCOSegmentation(BaseDataset): ...@@ -124,3 +99,28 @@ class COCOSegmentation(BaseDataset):
print('Found number of qualified images: ', len(new_ids)) print('Found number of qualified images: ', len(new_ids))
torch.save(new_ids, ids_file) torch.save(new_ids, ids_file)
return new_ids return new_ids
"""
NUM_CHANNEL = 91
[] background
[5] airplane
[2] bicycle
[16] bird
[9] boat
[44] bottle
[6] bus
[3] car
[17] cat
[62] chair
[21] cow
[67] dining table
[18] dog
[19] horse
[4] motorcycle
[1] person
[64] potted plant
[20] sheep
[63] couch
[7] train
[72] tv
"""
...@@ -15,8 +15,8 @@ class VOCAugSegmentation(BaseDataset): ...@@ -15,8 +15,8 @@ class VOCAugSegmentation(BaseDataset):
] ]
NUM_CLASS = 21 NUM_CLASS = 21
TRAIN_BASE_DIR = 'VOCaug/dataset/' TRAIN_BASE_DIR = 'VOCaug/dataset/'
def __init__(self, root, split='train', mode=None, transform=None, def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train',
target_transform=None, **kwargs): mode=None, transform=None, target_transform=None, **kwargs):
super(VOCAugSegmentation, self).__init__(root, split, mode, transform, super(VOCAugSegmentation, self).__init__(root, split, mode, transform,
target_transform, **kwargs) target_transform, **kwargs)
# train/val/test splits are pre-cut # train/val/test splits are pre-cut
......
...@@ -16,8 +16,8 @@ class VOCSegmentation(BaseDataset): ...@@ -16,8 +16,8 @@ class VOCSegmentation(BaseDataset):
] ]
NUM_CLASS = 21 NUM_CLASS = 21
BASE_DIR = 'VOCdevkit/VOC2012' BASE_DIR = 'VOCdevkit/VOC2012'
def __init__(self, root, split='train', mode=None, transform=None, def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train',
target_transform=None, **kwargs): mode=None, transform=None, target_transform=None, **kwargs):
super(VOCSegmentation, self).__init__(root, split, mode, transform, super(VOCSegmentation, self).__init__(root, split, mode, transform,
target_transform, **kwargs) target_transform, **kwargs)
_voc_root = os.path.join(self.root, self.BASE_DIR) _voc_root = os.path.join(self.root, self.BASE_DIR)
...@@ -65,10 +65,8 @@ class VOCSegmentation(BaseDataset): ...@@ -65,10 +65,8 @@ class VOCSegmentation(BaseDataset):
mask = self._mask_transform(mask) mask = self._mask_transform(mask)
# general resize, normalize and toTensor # general resize, normalize and toTensor
if self.transform is not None: if self.transform is not None:
#print("transform for input")
img = self.transform(img) img = self.transform(img)
if self.target_transform is not None: if self.target_transform is not None:
#print("transform for label")
target = self.target_transform(target) target = self.target_transform(target)
return img, target return img, target
...@@ -79,3 +77,7 @@ class VOCSegmentation(BaseDataset): ...@@ -79,3 +77,7 @@ class VOCSegmentation(BaseDataset):
def __len__(self): def __len__(self):
return len(self.images) return len(self.images)
@property
def pred_offset(self):
return 0
...@@ -26,7 +26,6 @@ class ContextSegmentation(BaseDataset): ...@@ -26,7 +26,6 @@ class ContextSegmentation(BaseDataset):
root = os.path.join(root, self.BASE_DIR) root = os.path.join(root, self.BASE_DIR)
annFile = os.path.join(root, 'trainval_merged.json') annFile = os.path.join(root, 'trainval_merged.json')
imgDir = os.path.join(root, 'JPEGImages') imgDir = os.path.join(root, 'JPEGImages')
mask_file = os.path.join(root, self.split+'.pth')
# training mode # training mode
self.detail = Detail(annFile, imgDir, split) self.detail = Detail(annFile, imgDir, split)
self.transform = transform self.transform = transform
...@@ -40,6 +39,8 @@ class ContextSegmentation(BaseDataset): ...@@ -40,6 +39,8 @@ class ContextSegmentation(BaseDataset):
68, 326, 72, 458, 34, 207, 80, 355, 85, 347, 220, 349, 360, 68, 326, 72, 458, 34, 207, 80, 355, 85, 347, 220, 349, 360,
98, 187, 104, 105, 366, 189, 368, 113, 115])) 98, 187, 104, 105, 366, 189, 368, 113, 115]))
self._key = np.array(range(len(self._mapping))).astype('uint8') self._key = np.array(range(len(self._mapping))).astype('uint8')
mask_file = os.path.join(root, self.split+'.pth')
print('mask_file:', mask_file)
if os.path.exists(mask_file): if os.path.exists(mask_file):
self.masks = torch.load(mask_file) self.masks = torch.load(mask_file)
else: else:
...@@ -48,7 +49,6 @@ class ContextSegmentation(BaseDataset): ...@@ -48,7 +49,6 @@ class ContextSegmentation(BaseDataset):
def _class_to_index(self, mask): def _class_to_index(self, mask):
# assert the values # assert the values
values = np.unique(mask) values = np.unique(mask)
#assert(values.size > 1)
for i in range(len(values)): for i in range(len(values)):
assert(values[i] in self._mapping) assert(values[i] in self._mapping)
index = np.digitize(mask.ravel(), self._mapping, right=True) index = np.digitize(mask.ravel(), self._mapping, right=True)
...@@ -89,10 +89,8 @@ class ContextSegmentation(BaseDataset): ...@@ -89,10 +89,8 @@ class ContextSegmentation(BaseDataset):
mask = self._mask_transform(mask) mask = self._mask_transform(mask)
# general resize, normalize and toTensor # general resize, normalize and toTensor
if self.transform is not None: if self.transform is not None:
#print("transform for input")
img = self.transform(img) img = self.transform(img)
if self.target_transform is not None: if self.target_transform is not None:
#print("transform for label")
mask = self.target_transform(mask) mask = self.target_transform(mask)
return img, mask return img, mask
......
...@@ -10,9 +10,6 @@ __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', ...@@ -10,9 +10,6 @@ __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
model_urls = { model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
} }
...@@ -135,12 +132,24 @@ class ResNet(nn.Module): ...@@ -135,12 +132,24 @@ class ResNet(nn.Module):
- Yu, Fisher, and Vladlen Koltun. "Multi-scale context aggregation by dilated convolutions." - Yu, Fisher, and Vladlen Koltun. "Multi-scale context aggregation by dilated convolutions."
""" """
# pylint: disable=unused-variable # pylint: disable=unused-variable
def __init__(self, block, layers, num_classes=1000, dilated=True, norm_layer=nn.BatchNorm2d): def __init__(self, block, layers, num_classes=1000, dilated=True,
self.inplanes = 64 deep_base=True, norm_layer=nn.BatchNorm2d):
self.inplanes = 128 if deep_base else 64
super(ResNet, self).__init__() super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, if deep_base:
bias=False) self.conv1 = nn.Sequential(
self.bn1 = norm_layer(64) nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False),
norm_layer(64),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
norm_layer(64),
nn.ReLU(inplace=True),
nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=False),
)
else:
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True) self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer) self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer)
...@@ -155,7 +164,7 @@ class ResNet(nn.Module): ...@@ -155,7 +164,7 @@ class ResNet(nn.Module):
norm_layer=norm_layer) norm_layer=norm_layer)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
norm_layer=norm_layer) norm_layer=norm_layer)
self.avgpool = nn.AvgPool2d(7) self.avgpool = nn.AvgPool2d(7, stride=1)
self.fc = nn.Linear(512 * block.expansion, num_classes) self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules(): for m in self.modules():
...@@ -270,5 +279,7 @@ def resnet152(pretrained=False, root='~/.encoding/models', **kwargs): ...@@ -270,5 +279,7 @@ def resnet152(pretrained=False, root='~/.encoding/models', **kwargs):
""" """
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
if pretrained: if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) from ..models.model_store import get_model_file
model.load_state_dict(torch.load(
get_model_file('resnet152', root=root)), strict=False)
return model return model
"""Encoding Autograd Fuctions""" """Encoding Autograd Fuctions"""
from .encoding import * from .encoding import *
from .syncbn import * from .syncbn import *
from .customize import *
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang
## Email: zhanghang0704@gmail.com
## Copyright (c) 2018
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
"""Customized functions"""
import torch
from torch.autograd import Variable, Function
from .. import lib
__all__ = ['NonMaxSuppression']
def NonMaxSuppression(boxes, scores, threshold):
r"""Non-Maximum Suppression
The algorithm begins by storing the highest-scoring bounding
box, and eliminating any box whose intersection-over-union (IoU)
with it is too great. The procedure repeats on the surviving
boxes, and so on until there are no boxes left.
The stored boxes are returned.
NB: The function returns a tuple (mask, indices), where
indices index into the input boxes and are sorted
according to score, from higest to lowest.
indices[i][mask[i]] gives the indices of the surviving
boxes from the ith batch, sorted by score.
Args:
- boxes :math:`(N, n_boxes, 4)`
- scroes :math:`(N, n_boxes)`
- threshold (float): IoU above which to eliminate boxes
Outputs:
- mask: :math:`(N, n_boxes)`
- indicies: :math:`(N, n_boxes)`
Examples::
>>> boxes = torch.Tensor([[[10., 20., 20., 15.],
>>> [24., 22., 50., 54.],
>>> [10., 21., 20. 14.5]]])
>>> scores = torch.abs(torch.randn([1, 3]))
>>> mask, indices = NonMaxSuppression(boxes, scores, 0.7)
>>> #indices are SORTED according to score.
>>> surviving_box_indices = indices[mask]
"""
if boxes.is_cuda:
return lib.gpu.non_max_suppression(boxes, scores, threshold)
else:
return lib.cpu.non_max_suppression(boxes, scores, threshold)
...@@ -13,7 +13,7 @@ from torch.autograd import Function, Variable ...@@ -13,7 +13,7 @@ from torch.autograd import Function, Variable
import torch.nn.functional as F import torch.nn.functional as F
from .. import lib from .. import lib
__all__ = ['aggregate', 'scaledL2', 'pairwise_cosine'] __all__ = ['aggregate', 'scaled_l2', 'pairwise_cosine']
class _aggregate(Function): class _aggregate(Function):
@staticmethod @staticmethod
...@@ -23,7 +23,7 @@ class _aggregate(Function): ...@@ -23,7 +23,7 @@ class _aggregate(Function):
if A.is_cuda: if A.is_cuda:
E = lib.gpu.aggregate_forward(A, X, C) E = lib.gpu.aggregate_forward(A, X, C)
else: else:
raise NotImplemented E = lib.cpu.aggregate_forward(A, X, C)
return E return E
@staticmethod @staticmethod
...@@ -32,7 +32,7 @@ class _aggregate(Function): ...@@ -32,7 +32,7 @@ class _aggregate(Function):
if A.is_cuda: if A.is_cuda:
gradA, gradX, gradC = lib.gpu.aggregate_backward(gradE, A, X, C) gradA, gradX, gradC = lib.gpu.aggregate_backward(gradE, A, X, C)
else: else:
raise NotImplemented gradA, gradX, gradC = lib.cpu.aggregate_backward(gradE, A, X, C)
return gradA, gradX, gradC return gradA, gradX, gradC
def aggregate(A, X, C): def aggregate(A, X, C):
...@@ -60,13 +60,13 @@ def aggregate(A, X, C): ...@@ -60,13 +60,13 @@ def aggregate(A, X, C):
""" """
return _aggregate.apply(A, X, C) return _aggregate.apply(A, X, C)
class _scaledL2(Function): class _scaled_l2(Function):
@staticmethod @staticmethod
def forward(ctx, X, C, S): def forward(ctx, X, C, S):
if X.is_cuda: if X.is_cuda:
SL = lib.gpu.scaled_l2_forward(X, C, S) SL = lib.gpu.scaled_l2_forward(X, C, S)
else: else:
raise NotImplemented SL = lib.cpu.scaled_l2_forward(X, C, S)
ctx.save_for_backward(X, C, S, SL) ctx.save_for_backward(X, C, S, SL)
return SL return SL
...@@ -76,12 +76,11 @@ class _scaledL2(Function): ...@@ -76,12 +76,11 @@ class _scaledL2(Function):
if X.is_cuda: if X.is_cuda:
gradX, gradC, gradS = lib.gpu.scaled_l2_backward(gradSL, X, C, S, SL) gradX, gradC, gradS = lib.gpu.scaled_l2_backward(gradSL, X, C, S, SL)
else: else:
raise NotImplemented gradX, gradC, gradS = lib.cpu.scaled_l2_backward(gradSL, X, C, S, SL)
return gradX, gradC, gradS return gradX, gradC, gradS
def scaled_l2(X, C, S):
def scaledL2(X, C, S): r""" scaled_l2 distance
r""" scaledL2 distance
.. math:: .. math::
sl_{ik} = s_k \|x_i-c_k\|^2 sl_{ik} = s_k \|x_i-c_k\|^2
...@@ -93,7 +92,7 @@ def scaledL2(X, C, S): ...@@ -93,7 +92,7 @@ def scaledL2(X, C, S):
:math:`K` is number is codewords, :math:`D` is feature dimensions.) :math:`K` is number is codewords, :math:`D` is feature dimensions.)
- Output: :math:`E\in\mathcal{R}^{B\times N\times K}` - Output: :math:`E\in\mathcal{R}^{B\times N\times K}`
""" """
return _scaledL2.apply(X, C, S) return _scaled_l2.apply(X, C, S)
# Experimental # Experimental
def pairwise_cosine(X, C, normalize=False): def pairwise_cosine(X, C, normalize=False):
......
...@@ -26,7 +26,7 @@ class _sum_square(Function): ...@@ -26,7 +26,7 @@ class _sum_square(Function):
if input.is_cuda: if input.is_cuda:
xsum, xsqusum = lib.gpu.sumsquare_forward(input) xsum, xsqusum = lib.gpu.sumsquare_forward(input)
else: else:
raise NotImplemented xsum, xsqusum = lib.cpu.sumsquare_forward(input)
return xsum, xsqusum return xsum, xsqusum
@staticmethod @staticmethod
...@@ -46,7 +46,7 @@ class _batchnormtrain(Function): ...@@ -46,7 +46,7 @@ class _batchnormtrain(Function):
if input.is_cuda: if input.is_cuda:
output = lib.gpu.batchnorm_forward(input, mean, std, gamma, beta) output = lib.gpu.batchnorm_forward(input, mean, std, gamma, beta)
else: else:
raise NotImplemented output = lib.cpu.batchnorm_forward(input, mean, std, gamma, beta)
return output return output
@staticmethod @staticmethod
......
...@@ -6,15 +6,20 @@ cwd = os.path.dirname(os.path.realpath(__file__)) ...@@ -6,15 +6,20 @@ cwd = os.path.dirname(os.path.realpath(__file__))
cpu_path = os.path.join(cwd, 'cpu') cpu_path = os.path.join(cwd, 'cpu')
gpu_path = os.path.join(cwd, 'gpu') gpu_path = os.path.join(cwd, 'gpu')
cpu = load( 'enclib_cpu', [ cpu = load('enclib_cpu', [
os.path.join(cpu_path, 'roi_align.cpp'), os.path.join(cpu_path, 'operator.cpp'),
os.path.join(cpu_path, 'encoding_cpu.cpp'),
os.path.join(cpu_path, 'syncbn_cpu.cpp'),
os.path.join(cpu_path, 'roi_align_cpu.cpp'), os.path.join(cpu_path, 'roi_align_cpu.cpp'),
os.path.join(cpu_path, 'nms_cpu.cpp'),
], build_directory=cpu_path, verbose=False) ], build_directory=cpu_path, verbose=False)
if torch.cuda.is_available(): if torch.cuda.is_available():
gpu = load( 'enclib_gpu', [ gpu = load('enclib_gpu', [
os.path.join(gpu_path, 'operator.cpp'), os.path.join(gpu_path, 'operator.cpp'),
os.path.join(gpu_path, 'encoding_kernel.cu'), os.path.join(gpu_path, 'encoding_kernel.cu'),
os.path.join(gpu_path, 'encodingv2_kernel.cu'),
os.path.join(gpu_path, 'syncbn_kernel.cu'), os.path.join(gpu_path, 'syncbn_kernel.cu'),
os.path.join(gpu_path, 'roi_align_kernel.cu'), os.path.join(gpu_path, 'roi_align_kernel.cu'),
os.path.join(gpu_path, 'nms_kernel.cu'),
], build_directory=gpu_path, verbose=False) ], build_directory=gpu_path, verbose=False)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment