deeplab_demo.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# DeepLab Demo\n",
    "\n",
    "This demo will demostrate the steps to run deeplab semantic segmentation model on sample input images.\n",
    "\n",
    "## Prerequisites\n",
    "\n",
    "Running this demo requires the following libraries:\n",
    "\n",
    "* Jupyter notebook (Python 2)\n",
    "* Tensorflow (>= v1.5.0)\n",
    "* Matplotlib\n",
    "* Pillow\n",
    "* numpy\n",
    "* ipywidgets (follow the setup [here](https://ipywidgets.readthedocs.io/en/stable/user_install.html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import collections\n",
    "import os\n",
    "import StringIO\n",
    "import sys\n",
    "import tarfile\n",
    "import tempfile\n",
    "import urllib\n",
    "\n",
    "from IPython import display\n",
    "from ipywidgets import interact\n",
    "from ipywidgets import interactive\n",
    "from matplotlib import gridspec\n",
    "from matplotlib import pyplot as plt\n",
    "import numpy as np\n",
    "from PIL import Image\n",
    "\n",
    "import tensorflow as tf\n",
    "\n",
    "if tf.__version__ < '1.5.0':\n",
    "    raise ImportError('Please upgrade your tensorflow installation to v1.5.0 or newer!')\n",
    "\n",
    "# Needed to show segmentation colormap labels\n",
    "sys.path.append('utils')\n",
    "import get_dataset_colormap"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Select and download models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "_MODEL_URLS = {\n",
    "    'xception_coco_voctrainaug': 'http://download.tensorflow.org/models/deeplabv3_pascal_train_aug_2018_01_04.tar.gz',\n",
    "    'xception_coco_voctrainval': 'http://download.tensorflow.org/models/deeplabv3_pascal_trainval_2018_01_04.tar.gz',\n",
    "}\n",
    "\n",
    "Config = collections.namedtuple('Config', 'model_url, model_dir')\n",
    "\n",
    "def get_config(model_name, model_dir):\n",
    "    return Config(_MODEL_URLS[model_name], model_dir)\n",
    "\n",
    "config_widget = interactive(get_config, model_name=_MODEL_URLS.keys(), model_dir='')\n",
    "display.display(config_widget)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Check configuration and download the model\n",
    "\n",
    "_TARBALL_NAME = 'deeplab_model.tar.gz'\n",
    "\n",
    "config = config_widget.result\n",
    "\n",
    "model_dir = config.model_dir or tempfile.mkdtemp()\n",
    "tf.gfile.MakeDirs(model_dir)\n",
    "\n",
    "download_path = os.path.join(model_dir, _TARBALL_NAME)\n",
    "print 'downloading model to %s, this might take a while...' % download_path\n",
    "urllib.urlretrieve(config.model_url, download_path)\n",
    "print 'download completed!'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load model in TensorFlow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "_FROZEN_GRAPH_NAME = 'frozen_inference_graph'\n",
    "\n",
    "\n",
    "class DeepLabModel(object):\n",
    "    \"\"\"Class to load deeplab model and run inference.\"\"\"\n",
    "    \n",
    "    INPUT_TENSOR_NAME = 'ImageTensor:0'\n",
    "    OUTPUT_TENSOR_NAME = 'SemanticPredictions:0'\n",
    "    INPUT_SIZE = 513\n",
    "\n",
    "    def __init__(self, tarball_path):\n",
    "        \"\"\"Creates and loads pretrained deeplab model.\"\"\"\n",
    "        self.graph = tf.Graph()\n",
    "        \n",
    "        graph_def = None\n",
    "        # Extract frozen graph from tar archive.\n",
    "        tar_file = tarfile.open(tarball_path)\n",
    "        for tar_info in tar_file.getmembers():\n",
    "            if _FROZEN_GRAPH_NAME in os.path.basename(tar_info.name):\n",
    "                file_handle = tar_file.extractfile(tar_info)\n",
    "                graph_def = tf.GraphDef.FromString(file_handle.read())\n",
    "                break\n",
    "\n",
    "        tar_file.close()\n",
    "        \n",
    "        if graph_def is None:\n",
    "            raise RuntimeError('Cannot find inference graph in tar archive.')\n",
    "\n",
    "        with self.graph.as_default():      \n",
    "            tf.import_graph_def(graph_def, name='')\n",
    "        \n",
    "        self.sess = tf.Session(graph=self.graph)\n",
    "            \n",
    "    def run(self, image):\n",
    "        \"\"\"Runs inference on a single image.\n",
    "        \n",
    "        Args:\n",
    "            image: A PIL.Image object, raw input image.\n",
    "            \n",
    "        Returns:\n",
    "            resized_image: RGB image resized from original input image.\n",
    "            seg_map: Segmentation map of `resized_image`.\n",
    "        \"\"\"\n",
    "        width, height = image.size\n",
    "        resize_ratio = 1.0 * self.INPUT_SIZE / max(width, height)\n",
    "        target_size = (int(resize_ratio * width), int(resize_ratio * height))\n",
    "        resized_image = image.convert('RGB').resize(target_size, Image.ANTIALIAS)\n",
    "        batch_seg_map = self.sess.run(\n",
    "            self.OUTPUT_TENSOR_NAME,\n",
    "            feed_dict={self.INPUT_TENSOR_NAME: [np.asarray(resized_image)]})\n",
    "        seg_map = batch_seg_map[0]\n",
    "        return resized_image, seg_map\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "model = DeepLabModel(download_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Helper methods"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "LABEL_NAMES = np.asarray([\n",
    "    'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',\n",
    "    'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',\n",
    "    'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa',\n",
    "    'train', 'tv'\n",
    "])\n",
    "\n",
    "FULL_LABEL_MAP = np.arange(len(LABEL_NAMES)).reshape(len(LABEL_NAMES), 1)\n",
    "FULL_COLOR_MAP = get_dataset_colormap.label_to_color_image(FULL_LABEL_MAP)\n",
    "\n",
    "\n",
    "def vis_segmentation(image, seg_map):\n",
    "    plt.figure(figsize=(15, 5))\n",
    "    grid_spec = gridspec.GridSpec(1, 4, width_ratios=[6, 6, 6, 1])\n",
    "\n",
    "    plt.subplot(grid_spec[0])\n",
    "    plt.imshow(image)\n",
    "    plt.axis('off')\n",
    "    plt.title('input image')\n",
    "    \n",
    "    plt.subplot(grid_spec[1])\n",
    "    seg_image = get_dataset_colormap.label_to_color_image(\n",
    "        seg_map, get_dataset_colormap.get_pascal_name()).astype(np.uint8)\n",
    "    plt.imshow(seg_image)\n",
    "    plt.axis('off')\n",
    "    plt.title('segmentation map')\n",
    "\n",
    "    plt.subplot(grid_spec[2])\n",
    "    plt.imshow(image)\n",
    "    plt.imshow(seg_image, alpha=0.7)\n",
    "    plt.axis('off')\n",
    "    plt.title('segmentation overlay')\n",
    "    \n",
    "    unique_labels = np.unique(seg_map)\n",
    "    ax = plt.subplot(grid_spec[3])\n",
    "    plt.imshow(FULL_COLOR_MAP[unique_labels].astype(np.uint8), interpolation='nearest')\n",
    "    ax.yaxis.tick_right()\n",
    "    plt.yticks(range(len(unique_labels)), LABEL_NAMES[unique_labels])\n",
    "    plt.xticks([], [])\n",
    "    ax.tick_params(width=0)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Run on sample images"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Note that we are using single scale inference in the demo for fast\n",
    "# computation, so the results may slightly differ from the visualizations\n",
    "# in README, which uses multi-scale and left-right flipped inputs.\n",
    "\n",
    "IMAGE_DIR = 'g3doc/img'\n",
    "\n",
    "def run_demo_image(image_name):\n",
    "    try:\n",
    "        image_path = os.path.join(IMAGE_DIR, image_name)\n",
    "        orignal_im = Image.open(image_path)\n",
    "    except IOError:\n",
    "        print 'Failed to read image from %s.' % image_path \n",
    "        return \n",
    "    print 'running deeplab on image %s...' % image_name\n",
    "    resized_im, seg_map = model.run(orignal_im)\n",
    "    \n",
    "    vis_segmentation(resized_im, seg_map)\n",
    "\n",
    "_ = interact(run_demo_image, image_name=['image1.jpg', 'image2.jpg', 'image3.jpg'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Run on internet images"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "def get_an_internet_image(url):\n",
    "    if not url:\n",
    "        return\n",
    "\n",
    "    try:\n",
    "        # Prefix with 'file://' for local file.\n",
    "        if os.path.exists(url):\n",
    "            url = 'file://' + url\n",
    "        f = urllib.urlopen(url)\n",
    "        jpeg_str = f.read()\n",
    "    except IOError:\n",
    "        print 'invalid url: ' + url\n",
    "        return\n",
    "\n",
    "    orignal_im = Image.open(StringIO.StringIO(jpeg_str))\n",
    "    print 'running deeplab on image %s...' % url\n",
    "    resized_im, seg_map = model.run(orignal_im)\n",
    "    \n",
    "    vis_segmentation(resized_im, seg_map)\n",
    "\n",
    "_ = interact(get_an_internet_image, url='')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}