Convert to colab format

9ba5b316 · Mark Daoust · 2c929976 · 9ba5b316
Commit 9ba5b316 authored Jul 11, 2018 by Mark Daoust
Show whitespace changes
Inline Side-by-side

Showing with 2132 additions and 1706 deletions

samples/core/tutorials/estimators/wide.ipynb samples/core/tutorials/estimators/wide.ipynb +2132 -1706

No files found.
--- a/samples/core/tutorials/estimators/wide.ipynb
+++ b/samples/core/tutorials/estimators/wide.ipynb
 {
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "wide.ipynb",
+      "version": "0.3.2",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    }
+  },
  "cells": [
    {
+      "metadata": {
+        "id": "Zr7KpBhMcYvE",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "# TensorFlow Linear Model Tutorial\n",
-    "\n",
+        "\n"
+      ]
+    },
+    {
+      "metadata": {
+        "id": "77aETSYDcdoK",
+        "colab_type": "text"
+      },
+      "cell_type": "markdown",
+      "source": [
        "In this tutorial, we will use the `tf.estimator` API in TensorFlow to solve a\n",
        "binary classification problem: Given census data about a person such as age,\n",
        "education, marital status, and occupation (the features), we will try to predict\n",
@@ -25,24 +51,16 @@
      ]
    },
    {
+      "metadata": {
+        "id": "NQgONe5ecYvE",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 17
+        },
+        "outputId": "7ab0889a-32f9-4ace-f848-6c808893b88c"
+      },
      "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "ValueError",
-     "evalue": "tf.enable_eager_execution must be called at program startup.",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-42-04d0fb7a9ec6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeature_column\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menable_eager_execution\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/venv3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py\u001b[0m in \u001b[0;36menable_eager_execution\u001b[0;34m(config, device_policy, execution_mode)\u001b[0m\n\u001b[1;32m   5238\u001b[0m   \"\"\"\n\u001b[1;32m   5239\u001b[0m   return enable_eager_execution_internal(\n\u001b[0;32m-> 5240\u001b[0;31m       config, device_policy, execution_mode, None)\n\u001b[0m\u001b[1;32m   5241\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   5242\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/venv3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py\u001b[0m in \u001b[0;36menable_eager_execution_internal\u001b[0;34m(config, device_policy, execution_mode, server_def)\u001b[0m\n\u001b[1;32m   5306\u001b[0m   \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   5307\u001b[0m     raise ValueError(\n\u001b[0;32m-> 5308\u001b[0;31m         \"tf.enable_eager_execution must be called at program startup.\")\n\u001b[0m\u001b[1;32m   5309\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   5310\u001b[0m   \u001b[0;31m# Monkey patch to get rid of an unnecessary conditional since the context is\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mValueError\u001b[0m: tf.enable_eager_execution must be called at program startup."
-     ]
-    }
-   ],
      "source": [
        "import tensorflow as tf\n",
        "import tensorflow.feature_column as fc \n",
@@ -51,29 +69,32 @@
        "import os\n",
        "import sys\n",
        "from IPython.display import clear_output"
-   ]
+      ],
+      "execution_count": 1,
+      "outputs": []
    },
    {
+      "metadata": {
+        "id": "-MPr95UccYvL",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "Download the [tutorial code from github](https://github.com/tensorflow/models/tree/master/official/wide_deep/),\n",
        " add the root directory to your python path, and jump to the `wide_deep` directory:"
      ]
    },
    {
+      "metadata": {
+        "id": "yVvFyhnkcYvL",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 136
+        },
+        "outputId": "e57030d7-7f5c-455e-ea0f-55038e909d97"
+      },
      "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "fatal: destination path 'models' already exists and is not an empty directory.\r\n"
-     ]
-    }
-   ],
      "source": [
        "if \"wide_deep\" not in os.getcwd():\n",
        "    ! git clone --depth 1 https://github.com/tensorflow/models\n",
@@ -81,55 +102,96 @@
        "    sys.path.append(models_path)   \n",
        "    os.environ['PYTHONPATH'] += os.pathsep+models_path\n",
        "    os.chdir(\"models/official/wide_deep\")"
+      ],
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Cloning into 'models'...\n",
+            "remote: Counting objects: 2826, done.\u001b[K\n",
+            "remote: Compressing objects: 100% (2375/2375), done.\u001b[K\n",
+            "remote: Total 2826 (delta 543), reused 1731 (delta 382), pack-reused 0\u001b[K\n",
+            "Receiving objects: 100% (2826/2826), 371.22 MiB | 39.17 MiB/s, done.\n",
+            "Resolving deltas: 100% (543/543), done.\n",
+            "Checking out files: 100% (2934/2934), done.\n"
+          ],
+          "name": "stdout"
+        }
      ]
    },
    {
+      "metadata": {
+        "id": "15Ethw-wcYvP",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "Execute the data download script:"
      ]
    },
    {
+      "metadata": {
+        "id": "6QilS4-0cYvQ",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 17
+        },
+        "outputId": "3faf2df7-677e-4a91-c09b-3d81ca30c9c1"
+      },
      "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
      "source": [
        "import census_dataset\n",
        "import census_main\n",
        "\n",
        "census_dataset.download(\"/tmp/census_data/\")"
-   ]
+      ],
+      "execution_count": 3,
+      "outputs": []
    },
    {
+      "metadata": {
+        "id": "cD5e3ibAcYvS",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "Execute the tutorial code with the following command to train the model described in this tutorial, from the command line:"
      ]
    },
    {
+      "metadata": {
+        "id": "vbJ8jPAhcYvT",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        },
+        "outputId": "cc0182c0-90d7-4f9c-b421-0dd67166c6d2"
+      },
      "cell_type": "code",
+      "source": [
+        "output = !python -m census_main --model_type=wide --train_epochs=2\n",
+        "print([line for line in output if 'accuracy:' in line])"
+      ],
      "execution_count": 4,
-   "metadata": {},
      "outputs": [
        {
-     "name": "stdout",
          "output_type": "stream",
          "text": [
-      "['I0711 14:47:25.747490 139708077598464 tf_logging.py:115] accuracy: 0.833794']\n"
+            "['I0711 22:27:15.442501 140285526747008 tf_logging.py:115] accuracy: 0.8360666']\n"
-     ]
-    }
          ],
-   "source": [
+          "name": "stdout"
-    "output = !python -m census_main --model_type=wide --train_epochs=2\n",
+        }
-    "print([line for line in output if 'accuracy:' in line])"
      ]
    },
    {
+      "metadata": {
+        "id": "AmZ4CpaOcYvV",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "Read on to find out how this code builds its linear model.\n",
        "\n",
@@ -151,38 +213,70 @@
      ]
    },
    {
+      "metadata": {
+        "id": "N6Tgye8bcYvX",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        },
+        "outputId": "75152d8d-6afa-4e4e-cc0e-3eac7127f8fd"
+      },
      "cell_type": "code",
+      "source": [
+        "!ls  /tmp/census_data/"
+      ],
      "execution_count": 5,
-   "metadata": {},
      "outputs": [
        {
-     "name": "stdout",
          "output_type": "stream",
          "text": [
            "adult.data  adult.test\r\n"
-     ]
-    }
          ],
-   "source": [
+          "name": "stdout"
-    "!ls  /tmp/census_data/"
+        }
      ]
    },
    {
+      "metadata": {
+        "id": "6y3mj9zKcYva",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 17
+        },
+        "outputId": "3b44b7dd-5a2d-4943-eb19-20f26d5c7098"
+      },
      "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
      "source": [
        "train_file = \"/tmp/census_data/adult.data\"\n",
        "test_file = \"/tmp/census_data/adult.test\""
-   ]
+      ],
+      "execution_count": 6,
+      "outputs": []
    },
    {
+      "metadata": {
+        "id": "vkn1FNmpcYvb",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 204
+        },
+        "outputId": "4e27b186-b76c-4f19-ea9d-abe19110e93b"
+      },
      "cell_type": "code",
+      "source": [
+        "import pandas\n",
+        "train_df = pandas.read_csv(train_file, header = None, names = census_dataset._CSV_COLUMNS)\n",
+        "test_df = pandas.read_csv(test_file, header = None, names = census_dataset._CSV_COLUMNS)\n",
+        "\n",
+        "train_df.head()"
+      ],
      "execution_count": 7,
-   "metadata": {},
      "outputs": [
        {
+          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
@@ -338,22 +432,19 @@
              "4             0             0              40           Cuba          <=50K  "
            ]
          },
-     "execution_count": 7,
+          "metadata": {
-     "metadata": {},
+            "tags": []
-     "output_type": "execute_result"
+          },
+          "execution_count": 7
        }
-   ],
-   "source": [
-    "import pandas\n",
-    "train_df = pandas.read_csv(train_file, header = None, names = census_dataset._CSV_COLUMNS)\n",
-    "test_df = pandas.read_csv(test_file, header = None, names = census_dataset._CSV_COLUMNS)\n",
-    "\n",
-    "train_df.head()"
      ]
    },
    {
+      "metadata": {
+        "id": "QZZtXes4cYvf",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "The columns can be grouped into two types—categorical\n",
        "and continuous columns:\n",
@@ -392,10 +483,16 @@
      ]
    },
    {
+      "metadata": {
+        "id": "N7zNJflKcYvg",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 17
+        },
+        "outputId": "4aebe747-0fca-4209-cf28-3164080ab89f"
+      },
      "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
      "source": [
        "def easy_input_function(df, label_key, num_epochs, shuffle, batch_size):\n",
        "  df = df.copy()\n",
@@ -408,34 +505,31 @@
        "  ds = ds.batch(batch_size).repeat(num_epochs)\n",
        "\n",
        "  return ds"
-   ]
+      ],
+      "execution_count": 8,
+      "outputs": []
    },
    {
+      "metadata": {
+        "id": "WeEgNR9AcYvh",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "Since we have eager execution enabled it is easy to inspect the resulting dataset:"
      ]
    },
    {
+      "metadata": {
+        "id": "ygaKuikecYvi",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 136
+        },
+        "outputId": "071665a2-d23f-4c15-da43-ce0d106d473f"
+      },
      "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Some feature keys: ['capital_gain', 'occupation', 'gender', 'capital_loss', 'workclass']\n",
-      "\n",
-      "A batch of Ages  : tf.Tensor([61 18 37 47 47 32 18 23 28 37], shape=(10,), dtype=int32)\n",
-      "\n",
-      "A batch of Labels: tf.Tensor(\n",
-      "[b'>50K' b'<=50K' b'>50K' b'>50K' b'>50K' b'>50K' b'<=50K' b'<=50K'\n",
-      " b'<=50K' b'<=50K'], shape=(10,), dtype=string)\n"
-     ]
-    }
-   ],
      "source": [
        "ds = easy_input_function(train_df, label_key='income_bracket', num_epochs=5, shuffle=True, batch_size=10)\n",
        "\n",
@@ -447,11 +541,30 @@
        "print('A batch of Ages  :', feature_batch['age'])\n",
        "print()\n",
        "print('A batch of Labels:', label_batch )"
+      ],
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Some feature keys: ['age', 'workclass', 'fnlwgt', 'education', 'education_num']\n",
+            "\n",
+            "A batch of Ages  : tf.Tensor([52 57 31 33 34 22 32 66 35 44], shape=(10,), dtype=int32)\n",
+            "\n",
+            "A batch of Labels: tf.Tensor(\n",
+            "[b'<=50K' b'<=50K' b'<=50K' b'<=50K' b'<=50K' b'<=50K' b'<=50K' b'<=50K'\n",
+            " b'<=50K' b'>50K'], shape=(10,), dtype=string)\n"
+          ],
+          "name": "stdout"
+        }
      ]
    },
    {
+      "metadata": {
+        "id": "O_KZxQUucYvm",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "But this approach has severly-limited scalability. For larger data it should be streamed off disk.\n",
        "the `census_dataset.input_fn` provides an example of how to do this using `tf.decode_csv` and `tf.data.TextLineDataset`: \n",
@@ -460,12 +573,23 @@
      ]
    },
    {
+      "metadata": {
+        "id": "vUTeXaEUcYvn",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 493
+        },
+        "outputId": "2da7413a-5e54-4e86-f3c5-07387156ab79"
+      },
      "cell_type": "code",
+      "source": [
+        "import inspect\n",
+        "print(inspect.getsource(census_dataset.input_fn))"
+      ],
      "execution_count": 10,
-   "metadata": {},
      "outputs": [
        {
-     "name": "stdout",
          "output_type": "stream",
          "text": [
            "def input_fn(data_file, num_epochs, shuffle, batch_size):\n",
@@ -496,63 +620,65 @@
            "  dataset = dataset.batch(batch_size)\n",
            "  return dataset\n",
            "\n"
-     ]
-    }
          ],
-   "source": [
+          "name": "stdout"
-    "import inspect\n",
+        }
-    "print(inspect.getsource(census_dataset.input_fn))"
      ]
    },
    {
+      "metadata": {
+        "id": "yyGcv_e-cYvq",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "This input_fn gives equivalent output:"
      ]
    },
    {
+      "metadata": {
+        "id": "DlsqRZS5cYvr",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 68
+        },
+        "outputId": "31dee63f-80f7-4c7e-f749-a5531d33ab95"
+      },
      "cell_type": "code",
+      "source": [
+        "ds = census_dataset.input_fn(train_file, num_epochs=5, shuffle=True, batch_size=10)"
+      ],
      "execution_count": 11,
-   "metadata": {},
      "outputs": [
        {
-     "name": "stdout",
          "output_type": "stream",
          "text": [
            "INFO:tensorflow:Parsing /tmp/census_data/adult.data\n"
-     ]
+          ],
+          "name": "stdout"
        },
        {
-     "name": "stderr",
          "output_type": "stream",
          "text": [
            "WARNING: Logging before flag parsing goes to stderr.\n",
-      "I0711 14:47:26.362334 140466218788608 tf_logging.py:115] Parsing /tmp/census_data/adult.data\n"
+            "I0711 22:27:19.570451 140174775953280 tf_logging.py:115] Parsing /tmp/census_data/adult.data\n"
-     ]
-    }
          ],
-   "source": [
+          "name": "stderr"
-    "ds = census_dataset.input_fn(train_file, num_epochs=5, shuffle=True, batch_size=10)"
+        }
      ]
    },
    {
+      "metadata": {
+        "id": "Mv3as_CEcYvu",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 102
+        },
+        "outputId": "3834b00d-9655-488f-d6d2-8d7405848d78"
+      },
      "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Feature keys: ['capital_gain', 'occupation', 'gender', 'capital_loss', 'workclass']\n",
-      "\n",
-      "Age batch   : tf.Tensor([46 38 42 37 29 48 46 40 73 49], shape=(10,), dtype=int32)\n",
-      "\n",
-      "Label batch : tf.Tensor([False False False False False False False False  True False], shape=(10,), dtype=bool)\n"
-     ]
-    }
-   ],
      "source": [
        "for feature_batch, label_batch in ds:\n",
        "    break\n",
@@ -562,29 +688,57 @@
        "print('Age batch   :', feature_batch['age'])\n",
        "print()\n",
        "print('Label batch :', label_batch )"
+      ],
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Feature keys: ['age', 'workclass', 'fnlwgt', 'education', 'education_num']\n",
+            "\n",
+            "Age batch   : tf.Tensor([31 88 36 46 20 51 30 40 31 49], shape=(10,), dtype=int32)\n",
+            "\n",
+            "Label batch : tf.Tensor([False False  True  True False  True  True False False  True], shape=(10,), dtype=bool)\n"
+          ],
+          "name": "stdout"
+        }
      ]
    },
    {
+      "metadata": {
+        "id": "810fnfY5cYvz",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "Because `Estimators` expect an `input_fn` that takes no arguments, we typically wrap configurable input function into an obejct with the expected signature. For this notebook configure the `train_inpf` to iterate over the data twice:"
      ]
    },
    {
+      "metadata": {
+        "id": "wnQdpEcVcYv0",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 17
+        },
+        "outputId": "b9050d80-e603-4363-dbe9-11c2b368e29d"
+      },
      "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
      "source": [
        "import functools\n",
        "train_inpf = functools.partial(census_dataset.input_fn, train_file, num_epochs=2, shuffle=True, batch_size=64)\n",
        "test_inpf = functools.partial(census_dataset.input_fn, test_file, num_epochs=1, shuffle=False, batch_size=64)"
-   ]
+      ],
+      "execution_count": 13,
+      "outputs": []
    },
    {
+      "metadata": {
+        "id": "pboNpNWhcYv4",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "## Selecting and Engineering Features for the Model\n",
        "\n",
@@ -609,73 +763,92 @@
      ]
    },
    {
+      "metadata": {
+        "id": "ZX0r2T5OcYv6",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 17
+        },
+        "outputId": "283bf438-2a96-4bf3-fa89-94da99f93927"
+      },
      "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
      "source": [
        "age = fc.numeric_column('age')"
-   ]
+      ],
+      "execution_count": 14,
+      "outputs": []
    },
    {
+      "metadata": {
+        "id": "tnLUiaHxcYv-",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "The model will use the `feature_column` definitions to build the model input. You can inspect the resulting output using the `input_layer` function:"
      ]
    },
    {
-   "cell_type": "code",
-   "execution_count": 15,
      "metadata": {
-    "scrolled": true
+        "id": "kREtIPfwcYv_",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 187
        },
+        "outputId": "197a798b-9809-45e1-a8d4-ed5d237eea9d"
+      },
+      "cell_type": "code",
+      "source": [
+        "fc.input_layer(feature_batch, [age]).numpy()"
+      ],
+      "execution_count": 15,
      "outputs": [
        {
+          "output_type": "execute_result",
          "data": {
            "text/plain": [
-       "<tf.Tensor: id=237, shape=(10, 1), dtype=float32, numpy=\n",
+              "array([[31.],\n",
-       "array([[46.],\n",
+              "       [88.],\n",
-       "       [38.],\n",
+              "       [36.],\n",
-       "       [42.],\n",
-       "       [37.],\n",
-       "       [29.],\n",
-       "       [48.],\n",
              "       [46.],\n",
+              "       [20.],\n",
+              "       [51.],\n",
+              "       [30.],\n",
              "       [40.],\n",
-       "       [73.],\n",
+              "       [31.],\n",
-       "       [49.]], dtype=float32)>"
+              "       [49.]], dtype=float32)"
            ]
          },
-     "execution_count": 15,
+          "metadata": {
-     "metadata": {},
+            "tags": []
-     "output_type": "execute_result"
+          },
+          "execution_count": 15
        }
-   ],
-   "source": [
-    "fc.input_layer(feature_batch, [age]).numpy()"
      ]
    },
    {
+      "metadata": {
+        "id": "OPuLduCucYwD",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "The following code will train and evaluate a model on only the `age` feature."
      ]
    },
    {
+      "metadata": {
+        "id": "9R5eSJ1pcYwE",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 54
+        },
+        "outputId": "ea791197-8300-4f31-cee1-f7d1b8209838"
+      },
      "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'precision': 0.29166666, 'auc_precision_recall': 0.31132147, 'average_loss': 0.5239897, 'label/mean': 0.23622628, 'auc': 0.6781367, 'loss': 33.4552, 'prediction/mean': 0.22513431, 'accuracy': 0.7631595, 'recall': 0.0018200728, 'global_step': 1018, 'accuracy_baseline': 0.76377374}\n"
-     ]
-    }
-   ],
      "source": [
        "classifier = tf.estimator.LinearClassifier(feature_columns=[age], n_classes=2)\n",
        "classifier.train(train_inpf)\n",
@@ -683,98 +856,127 @@
        "\n",
        "clear_output()\n",
        "print(result)"
+      ],
+      "execution_count": 16,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "{'accuracy': 0.76334375, 'accuracy_baseline': 0.76377374, 'auc': 0.67818105, 'auc_precision_recall': 0.31133735, 'average_loss': 0.52437353, 'label/mean': 0.23622628, 'loss': 33.479706, 'precision': 0.31578946, 'prediction/mean': 0.22410269, 'recall': 0.0015600624, 'global_step': 1018}\n"
+          ],
+          "name": "stdout"
+        }
      ]
    },
    {
+      "metadata": {
+        "id": "YDZGcdTdcYwI",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "Similarly, we can define a `NumericColumn` for each continuous feature column\n",
        "that we want to use in the model:"
      ]
    },
    {
+      "metadata": {
+        "id": "uqPbUqlxcYwJ",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 17
+        },
+        "outputId": "68f4ccfd-d71b-4327-b8e8-25c40e986bed"
+      },
      "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
      "source": [
        "education_num = tf.feature_column.numeric_column('education_num')\n",
        "capital_gain = tf.feature_column.numeric_column('capital_gain')\n",
        "capital_loss = tf.feature_column.numeric_column('capital_loss')\n",
        "hours_per_week = tf.feature_column.numeric_column('hours_per_week')"
-   ]
+      ],
+      "execution_count": 17,
+      "outputs": []
    },
    {
+      "metadata": {
+        "id": "yqCF0a4DcYwM",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 17
+        },
+        "outputId": "0f9097a4-bc79-4e67-bd63-6a4d4461736d"
+      },
      "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
      "source": [
        "my_numeric_columns = [age,education_num, capital_gain, capital_loss, hours_per_week]"
-   ]
+      ],
+      "execution_count": 18,
+      "outputs": []
    },
    {
+      "metadata": {
+        "id": "xDrZtAZ0cYwO",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 204
+        },
+        "outputId": "6fd558ea-9f0c-4deb-cb8a-6211ec233016"
+      },
      "cell_type": "code",
+      "source": [
+        "fc.input_layer(feature_batch, my_numeric_columns).numpy()"
+      ],
      "execution_count": 19,
-   "metadata": {},
      "outputs": [
        {
+          "output_type": "execute_result",
          "data": {
            "text/plain": [
-       "<tf.Tensor: id=2160, shape=(10, 5), dtype=float32, numpy=\n",
+              "array([[3.1000e+01, 0.0000e+00, 0.0000e+00, 1.4000e+01, 4.3000e+01],\n",
-       "array([[4.600e+01, 0.000e+00, 0.000e+00, 6.000e+00, 4.000e+01],\n",
+              "       [8.8000e+01, 0.0000e+00, 0.0000e+00, 1.5000e+01, 4.0000e+01],\n",
-       "       [3.800e+01, 4.508e+03, 0.000e+00, 1.300e+01, 4.000e+01],\n",
+              "       [3.6000e+01, 1.5024e+04, 0.0000e+00, 9.0000e+00, 4.0000e+01],\n",
-       "       [4.200e+01, 0.000e+00, 0.000e+00, 1.400e+01, 4.000e+01],\n",
+              "       [4.6000e+01, 0.0000e+00, 0.0000e+00, 1.4000e+01, 5.5000e+01],\n",
-       "       [3.700e+01, 0.000e+00, 0.000e+00, 1.100e+01, 4.000e+01],\n",
+              "       [2.0000e+01, 0.0000e+00, 0.0000e+00, 1.0000e+01, 1.0000e+01],\n",
-       "       [2.900e+01, 0.000e+00, 0.000e+00, 9.000e+00, 4.000e+01],\n",
+              "       [5.1000e+01, 5.1780e+03, 0.0000e+00, 1.2000e+01, 4.5000e+01],\n",
-       "       [4.800e+01, 0.000e+00, 0.000e+00, 1.300e+01, 5.500e+01],\n",
+              "       [3.0000e+01, 1.5024e+04, 0.0000e+00, 1.4000e+01, 6.0000e+01],\n",
-       "       [4.600e+01, 0.000e+00, 0.000e+00, 9.000e+00, 5.000e+01],\n",
+              "       [4.0000e+01, 0.0000e+00, 0.0000e+00, 9.0000e+00, 4.0000e+01],\n",
-       "       [4.000e+01, 0.000e+00, 0.000e+00, 9.000e+00, 4.000e+01],\n",
+              "       [3.1000e+01, 0.0000e+00, 0.0000e+00, 1.0000e+01, 1.0000e+01],\n",
-       "       [7.300e+01, 6.418e+03, 0.000e+00, 4.000e+00, 9.900e+01],\n",
+              "       [4.9000e+01, 0.0000e+00, 0.0000e+00, 1.3000e+01, 4.0000e+01]],\n",
-       "       [4.900e+01, 0.000e+00, 0.000e+00, 4.000e+00, 4.000e+01]],\n",
+              "      dtype=float32)"
-       "      dtype=float32)>"
            ]
          },
-     "execution_count": 19,
+          "metadata": {
-     "metadata": {},
+            "tags": []
-     "output_type": "execute_result"
+          },
+          "execution_count": 19
        }
-   ],
-   "source": [
-    "fc.input_layer(feature_batch, my_numeric_columns).numpy()"
      ]
    },
    {
+      "metadata": {
+        "id": "cBGDN97IcYwQ",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "You could retrain a model on these features with, just by changing the `feature_columns` argument to the constructor:"
      ]
    },
    {
+      "metadata": {
+        "id": "XN8k5S95cYwR",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 204
+        },
+        "outputId": "72be27c1-e25c-4609-a703-8297c936177a"
+      },
      "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "accuracy: 0.7817087\n",
-      "accuracy_baseline: 0.76377374\n",
-      "auc: 0.8027547\n",
-      "auc_precision_recall: 0.5611528\n",
-      "average_loss: 1.0698086\n",
-      "global_step: 1018\n",
-      "label/mean: 0.23622628\n",
-      "loss: 68.30414\n",
-      "precision: 0.57025987\n",
-      "prediction/mean: 0.36397633\n",
-      "recall: 0.30811232\n"
-     ]
-    }
-   ],
      "source": [
        "classifier = tf.estimator.LinearClassifier(feature_columns=my_numeric_columns, n_classes=2)\n",
        "classifier.train(train_inpf)\n",
@@ -784,11 +986,34 @@
        "clear_output()\n",
        "for key,value in sorted(result.items()):\n",
        "  print('%s: %s' % (key, value))"
+      ],
+      "execution_count": 20,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "accuracy: 0.76377374\n",
+            "accuracy_baseline: 0.76377374\n",
+            "auc: 0.539677\n",
+            "auc_precision_recall: 0.334656\n",
+            "average_loss: 1.4886041\n",
+            "global_step: 1018\n",
+            "label/mean: 0.23622628\n",
+            "loss: 95.04299\n",
+            "precision: 0.0\n",
+            "prediction/mean: 0.21315515\n",
+            "recall: 0.0\n"
+          ],
+          "name": "stdout"
+        }
      ]
    },
    {
+      "metadata": {
+        "id": "jBRq9_AzcYwU",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "#### Categorical columns\n",
        "\n",
@@ -799,20 +1024,31 @@
      ]
    },
    {
+      "metadata": {
+        "id": "0IjqSi9tcYwV",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 37
+        },
+        "outputId": "859f282d-7a9c-417b-a615-643a15d10118"
+      },
      "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
      "source": [
        "relationship = fc.categorical_column_with_vocabulary_list(\n",
        "    'relationship', [\n",
        "        'Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried',\n",
        "        'Other-relative'])\n"
-   ]
+      ],
+      "execution_count": 21,
+      "outputs": []
    },
    {
+      "metadata": {
+        "id": "-RjoWv-7cYwW",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "This will create a sparse one-hot vector from the raw input feature.\n",
        "\n",
@@ -824,122 +1060,168 @@
      ]
    },
    {
-   "cell_type": "code",
-   "execution_count": 23,
      "metadata": {
-    "scrolled": true
+        "id": "kI43CYlncYwY",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 224
        },
+        "outputId": "458177e5-4bc0-48f2-b1fb-614b91dd99e6"
+      },
+      "cell_type": "code",
+      "source": [
+        "fc.input_layer(feature_batch, [age, fc.indicator_column(relationship)])"
+      ],
+      "execution_count": 22,
      "outputs": [
        {
+          "output_type": "execute_result",
          "data": {
            "text/plain": [
-       "<tf.Tensor: id=4490, shape=(10, 7), dtype=float32, numpy=\n",
+              "<tf.Tensor: id=4361, shape=(10, 7), dtype=float32, numpy=\n",
-       "array([[46.,  0.,  0.,  0.,  0.,  1.,  0.],\n",
+              "array([[31.,  0.,  1.,  0.,  0.,  0.,  0.],\n",
-       "       [38.,  1.,  0.,  0.,  0.,  0.,  0.],\n",
+              "       [88.,  1.,  0.,  0.,  0.,  0.,  0.],\n",
-       "       [42.,  0.,  1.,  0.,  0.,  0.,  0.],\n",
+              "       [36.,  1.,  0.,  0.,  0.,  0.,  0.],\n",
-       "       [37.,  1.,  0.,  0.,  0.,  0.,  0.],\n",
-       "       [29.,  1.,  0.,  0.,  0.,  0.,  0.],\n",
-       "       [48.,  1.,  0.,  0.,  0.,  0.,  0.],\n",
              "       [46.,  1.,  0.,  0.,  0.,  0.,  0.],\n",
+              "       [20.,  0.,  1.,  0.,  0.,  0.,  0.],\n",
+              "       [51.,  1.,  0.,  0.,  0.,  0.,  0.],\n",
+              "       [30.,  1.,  0.,  0.,  0.,  0.,  0.],\n",
              "       [40.,  1.,  0.,  0.,  0.,  0.,  0.],\n",
-       "       [73.,  1.,  0.,  0.,  0.,  0.,  0.],\n",
+              "       [31.,  0.,  0.,  1.,  0.,  0.,  0.],\n",
-       "       [49.,  1.,  0.,  0.,  0.,  0.,  0.]], dtype=float32)>"
+              "       [49.,  0.,  1.,  0.,  0.,  0.,  0.]], dtype=float32)>"
            ]
          },
-     "execution_count": 23,
+          "metadata": {
-     "metadata": {},
+            "tags": []
-     "output_type": "execute_result"
+          },
+          "execution_count": 22
        }
-   ],
-   "source": [
-    "fc.input_layer(feature_batch, [age, fc.indicator_column(relationship)])"
      ]
    },
    {
+      "metadata": {
+        "id": "tTudP7WHcYwb",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "What if we don't know the set of possible values in advance? Not a problem. We\n",
        "can use `categorical_column_with_hash_bucket` instead:"
      ]
    },
    {
+      "metadata": {
+        "id": "8pSBaliCcYwb",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 37
+        },
+        "outputId": "e9b2e611-1311-4933-af0a-489e03fdc960"
+      },
      "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
      "source": [
        "occupation = tf.feature_column.categorical_column_with_hash_bucket(\n",
        "    'occupation', hash_bucket_size=1000)"
-   ]
+      ],
+      "execution_count": 23,
+      "outputs": []
    },
    {
+      "metadata": {
+        "id": "fSAPrqQkcYwd",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "What will happen is that each possible value in the feature column `occupation`\n",
        "will be hashed to an integer ID as we encounter them in training. The example batch has a few different occupations:"
      ]
    },
    {
+      "metadata": {
+        "id": "dCvQNv36cYwe",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 207
+        },
+        "outputId": "23ebfedd-faf8-425b-a855-9897aba20341"
+      },
      "cell_type": "code",
-   "execution_count": 25,
+      "source": [
-   "metadata": {},
+        "for item in feature_batch['occupation'].numpy():\n",
+        "    print(item.decode())"
+      ],
+      "execution_count": 24,
      "outputs": [
        {
-     "name": "stdout",
          "output_type": "stream",
          "text": [
-      "Machine-op-inspct\n",
-      "Transport-moving\n",
            "Prof-specialty\n",
-      "Adm-clerical\n",
+            "Exec-managerial\n",
-      "Handlers-cleaners\n",
            "Prof-specialty\n",
-      "Other-service\n",
+            "Exec-managerial\n",
-      "Farming-fishing\n",
+            "Tech-support\n",
-      "Farming-fishing\n",
+            "Sales\n",
-      "Handlers-cleaners\n"
+            "Exec-managerial\n",
-     ]
+            "Machine-op-inspct\n",
-    }
+            "?\n",
+            "Exec-managerial\n"
          ],
-   "source": [
+          "name": "stdout"
-    "for item in feature_batch['occupation'].numpy():\n",
+        }
-    "    print(item.decode())"
      ]
    },
    {
+      "metadata": {
+        "id": "KP5hN2rAcYwh",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "if we run `input_layer` with the hashed column we see that the output shape is `(batch_size, hash_bucket_size)`"
      ]
    },
    {
+      "metadata": {
+        "id": "0Y16peWacYwh",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 54
+        },
+        "outputId": "524b1af5-c492-4d0e-b736-7974ca618089"
+      },
      "cell_type": "code",
-   "execution_count": 27,
+      "source": [
-   "metadata": {},
+        "occupation_result = fc.input_layer(feature_batch, [fc.indicator_column(occupation)])\n",
+        "\n",
+        "occupation_result.numpy().shape"
+      ],
+      "execution_count": 25,
      "outputs": [
        {
+          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(10, 1000)"
            ]
          },
-     "execution_count": 27,
+          "metadata": {
-     "metadata": {},
+            "tags": []
-     "output_type": "execute_result"
+          },
+          "execution_count": 25
        }
-   ],
-   "source": [
-    "occupation_result = fc.input_layer(feature_batch, [fc.indicator_column(occupation)])\n",
-    "\n",
-    "occupation_result.numpy().shape"
      ]
    },
    {
+      "metadata": {
+        "id": "HMW2MzWAcYwk",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "It's easier to see the actual results if we take the tf.argmax over the `hash_bucket_size` dimension.\n",
        "\n",
@@ -949,28 +1231,41 @@
      ]
    },
    {
+      "metadata": {
+        "id": "q_ryRglmcYwk",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 54
+        },
+        "outputId": "e1797664-1200-48e3-c774-52e7e0a18f00"
+      },
      "cell_type": "code",
-   "execution_count": 28,
+      "source": [
-   "metadata": {},
+        "tf.argmax(occupation_result, axis=1).numpy()"
+      ],
+      "execution_count": 26,
      "outputs": [
        {
+          "output_type": "execute_result",
          "data": {
            "text/plain": [
-       "array([911, 420, 979,  96,  10, 979, 527, 936, 936,  10])"
+              "array([979, 800, 979, 800, 413, 631, 800, 911,  65, 800])"
            ]
          },
-     "execution_count": 28,
+          "metadata": {
-     "metadata": {},
+            "tags": []
-     "output_type": "execute_result"
+          },
+          "execution_count": 26
        }
-   ],
-   "source": [
-    "tf.argmax(occupation_result, axis=1).numpy()"
      ]
    },
    {
+      "metadata": {
+        "id": "j1e5NfyKcYwn",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "No matter which way we choose to define a `SparseColumn`, each feature string\n",
        "will be mapped into an integer ID by looking up a fixed mapping or by hashing.\n",
@@ -983,10 +1278,16 @@
      ]
    },
    {
+      "metadata": {
+        "id": "0Z5eUrd_cYwo",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 37
+        },
+        "outputId": "becd1bda-9014-4b9e-92ef-ba4ee2ed52fa"
+      },
      "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [],
      "source": [
        "education = tf.feature_column.categorical_column_with_vocabulary_list(\n",
        "    'education', [\n",
@@ -1003,47 +1304,48 @@
        "    'workclass', [\n",
        "        'Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov',\n",
        "        'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked'])\n"
-   ]
+      ],
+      "execution_count": 27,
+      "outputs": []
    },
    {
+      "metadata": {
+        "id": "a03l9ozUcYwp",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 37
+        },
+        "outputId": "374c7f00-8d2e-458f-ec32-b4cbc6b7386f"
+      },
      "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [],
      "source": [
        "my_categorical_columns = [relationship, occupation, education, marital_status, workclass]"
-   ]
+      ],
+      "execution_count": 28,
+      "outputs": []
    },
    {
+      "metadata": {
+        "id": "ASQJM1pEcYwr",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "It's easy to use both sets of columns to configure a model that uses all these features:"
      ]
    },
    {
+      "metadata": {
+        "id": "_i_MLoo9cYws",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 224
+        },
+        "outputId": "95ab18a4-2ec1-4fad-c207-2f86b607a333"
+      },
      "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "accuracy: 0.83342546\n",
-      "accuracy_baseline: 0.76377374\n",
-      "auc: 0.8807037\n",
-      "auc_precision_recall: 0.6601031\n",
-      "average_loss: 0.8671454\n",
-      "global_step: 1018\n",
-      "label/mean: 0.23622628\n",
-      "loss: 55.36468\n",
-      "precision: 0.6496042\n",
-      "prediction/mean: 0.2628341\n",
-      "recall: 0.6401456\n"
-     ]
-    }
-   ],
      "source": [
        "classifier = tf.estimator.LinearClassifier(feature_columns=my_numeric_columns+my_categorical_columns, n_classes=2)\n",
        "classifier.train(train_inpf)\n",
@@ -1052,11 +1354,34 @@
        "clear_output()\n",
        "for key,value in sorted(result.items()):\n",
        "  print('%s: %s' % (key, value))"
+      ],
+      "execution_count": 29,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "accuracy: 0.81978995\n",
+            "accuracy_baseline: 0.76377374\n",
+            "auc: 0.869223\n",
+            "auc_precision_recall: 0.6459037\n",
+            "average_loss: 1.9878242\n",
+            "global_step: 1018\n",
+            "label/mean: 0.23622628\n",
+            "loss: 126.916725\n",
+            "precision: 0.60679156\n",
+            "prediction/mean: 0.2908891\n",
+            "recall: 0.6736869\n"
+          ],
+          "name": "stdout"
+        }
      ]
    },
    {
+      "metadata": {
+        "id": "zdKEqF6xcYwv",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "### Derived feature columns\n",
        "\n",
@@ -1082,18 +1407,29 @@
      ]
    },
    {
+      "metadata": {
+        "id": "KT4pjD9AcYww",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 17
+        },
+        "outputId": "633c1bb5-e5e2-4cf3-8392-5caf473607da"
+      },
      "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [],
      "source": [
        "age_buckets = tf.feature_column.bucketized_column(\n",
        "    age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])"
-   ]
+      ],
+      "execution_count": 30,
+      "outputs": []
    },
    {
+      "metadata": {
+        "id": "S-XOscrEcYwx",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "where the `boundaries` is a list of bucket boundaries. In this case, there are\n",
        "10 boundaries, resulting in 11 age group buckets (from age 17 and below, 18-24,\n",
@@ -1103,38 +1439,51 @@
      ]
    },
    {
+      "metadata": {
+        "id": "Lr40vm3qcYwy",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 204
+        },
+        "outputId": "e53a3d92-f8d4-4ff7-da5e-46f498eb2316"
+      },
      "cell_type": "code",
-   "execution_count": 33,
+      "source": [
-   "metadata": {},
+        "fc.input_layer(feature_batch, [age, age_buckets]).numpy()"
+      ],
+      "execution_count": 31,
      "outputs": [
        {
+          "output_type": "execute_result",
          "data": {
            "text/plain": [
-       "array([[46.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.],\n",
+              "array([[31.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
-       "       [38.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
+              "       [88.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.],\n",
-       "       [42.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],\n",
+              "       [36.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
-       "       [37.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
-       "       [29.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
-       "       [48.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.],\n",
              "       [46.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.],\n",
+              "       [20.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
+              "       [51.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.],\n",
+              "       [30.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
              "       [40.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],\n",
-       "       [73.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.],\n",
+              "       [31.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
              "       [49.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.]],\n",
              "      dtype=float32)"
            ]
          },
-     "execution_count": 33,
+          "metadata": {
-     "metadata": {},
+            "tags": []
-     "output_type": "execute_result"
+          },
+          "execution_count": 31
        }
-   ],
-   "source": [
-    "fc.input_layer(feature_batch, [age, age_buckets]).numpy()"
      ]
    },
    {
+      "metadata": {
+        "id": "Z_tQI9j8cYw1",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "#### Learn complex relationships with crossed column\n",
        "\n",
@@ -1150,18 +1499,29 @@
      ]
    },
    {
+      "metadata": {
+        "id": "IAPhPzXscYw1",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 37
+        },
+        "outputId": "4dd22eaf-3917-449d-9068-5306ae60b6a6"
+      },
      "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [],
      "source": [
        "education_x_occupation = tf.feature_column.crossed_column(\n",
        "    ['education', 'occupation'], hash_bucket_size=1000)"
-   ]
+      ],
+      "execution_count": 32,
+      "outputs": []
    },
    {
+      "metadata": {
+        "id": "UeTxMunbcYw5",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "We can also create a `crossed_column` over more than two columns. Each\n",
        "constituent column can be either a base feature column that is categorical\n",
@@ -1170,18 +1530,29 @@
      ]
    },
    {
+      "metadata": {
+        "id": "y8UaBld9cYw7",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 37
+        },
+        "outputId": "4abb43e7-c406-4caf-f15e-71af723ec8df"
+      },
      "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [],
      "source": [
        "age_buckets_x_education_x_occupation = tf.feature_column.crossed_column(\n",
        "    [age_buckets, 'education', 'occupation'], hash_bucket_size=1000)"
-   ]
+      ],
+      "execution_count": 33,
+      "outputs": []
    },
    {
+      "metadata": {
+        "id": "HvKmW6U5cYw8",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "These crossed columns always use hash buckets to avoid the exponential explosion in the number of categories, and put the control over number of model weights in the hands of the user.\n",
        "\n",
@@ -1190,8 +1561,11 @@
      ]
    },
    {
+      "metadata": {
+        "id": "HtjpheB6cYw9",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "## Defining The Logistic Regression Model\n",
        "\n",
@@ -1210,39 +1584,16 @@
      ]
    },
    {
-   "cell_type": "code",
+      "metadata": {
-   "execution_count": 36,
+        "id": "Klmf3OxpcYw-",
-   "metadata": {},
+        "colab_type": "code",
-   "outputs": [
+        "colab": {
-    {
+          "base_uri": "https://localhost:8080/",
-     "name": "stdout",
+          "height": 105
-     "output_type": "stream",
-     "text": [
-      "INFO:tensorflow:Using default config.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "I0711 14:48:54.071429 140466218788608 tf_logging.py:115] Using default config.\n"
-     ]
        },
-    {
+        "outputId": "a8f46b90-a9d0-4d33-fff5-38b530e35d43"
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:tensorflow:Using config: {'_global_id_in_cluster': 0, '_is_chief': True, '_keep_checkpoint_every_n_hours': 10000, '_tf_random_seed': None, '_num_worker_replicas': 1, '_device_fn': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fc03341f668>, '_evaluation_master': '', '_train_distribute': None, '_model_dir': '/tmp/tmpligbanno', '_session_config': None, '_save_checkpoints_steps': None, '_master': '', '_num_ps_replicas': 0, '_task_type': 'worker', '_log_step_count_steps': 100, '_save_summary_steps': 100, '_service': None, '_task_id': 0, '_save_checkpoints_secs': 600, '_keep_checkpoint_max': 5}\n"
-     ]
      },
-    {
+      "cell_type": "code",
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "I0711 14:48:54.073915 140466218788608 tf_logging.py:115] Using config: {'_global_id_in_cluster': 0, '_is_chief': True, '_keep_checkpoint_every_n_hours': 10000, '_tf_random_seed': None, '_num_worker_replicas': 1, '_device_fn': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fc03341f668>, '_evaluation_master': '', '_train_distribute': None, '_model_dir': '/tmp/tmpligbanno', '_session_config': None, '_save_checkpoints_steps': None, '_master': '', '_num_ps_replicas': 0, '_task_type': 'worker', '_log_step_count_steps': 100, '_save_summary_steps': 100, '_service': None, '_task_id': 0, '_save_checkpoints_secs': 600, '_keep_checkpoint_max': 5}\n"
-     ]
-    }
-   ],
      "source": [
        "import tempfile\n",
        "\n",
@@ -1260,11 +1611,45 @@
        "model_dir = tempfile.mkdtemp()\n",
        "model = tf.estimator.LinearClassifier(\n",
        "    model_dir=model_dir, feature_columns=base_columns + crossed_columns)"
+      ],
+      "execution_count": 34,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "INFO:tensorflow:Using default config.\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "I0711 22:27:55.502184 140174775953280 tf_logging.py:115] Using default config.\n"
+          ],
+          "name": "stderr"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp93vf5hp6', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f7cc6df0ba8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "I0711 22:27:55.509107 140174775953280 tf_logging.py:115] Using config: {'_model_dir': '/tmp/tmp93vf5hp6', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f7cc6df0ba8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n"
+          ],
+          "name": "stderr"
+        }
      ]
    },
    {
+      "metadata": {
+        "id": "jRhnPxUucYxC",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "The model also automatically learns a bias term, which controls the prediction\n",
        "one would make without observing any features (see the section [How Logistic\n",
@@ -1279,30 +1664,54 @@
      ]
    },
    {
+      "metadata": {
+        "id": "ZlrIBuoecYxD",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 17
+        },
+        "outputId": "5aa0bc8c-9496-4301-963a-78bcef54e17a"
+      },
      "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [],
      "source": [
        "model.train(train_inpf)\n",
        "clear_output()"
-   ]
+      ],
+      "execution_count": 35,
+      "outputs": []
    },
    {
+      "metadata": {
+        "id": "IvY3a9pzcYxH",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "After the model is trained, we can evaluate how good our model is at predicting\n",
        "the labels of the holdout data:"
      ]
    },
    {
+      "metadata": {
+        "id": "L9nVJEO8cYxI",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 204
+        },
+        "outputId": "8eb14bd7-9030-4381-c18a-6a5c7c17c569"
+      },
      "cell_type": "code",
-   "execution_count": 39,
+      "source": [
-   "metadata": {},
+        "results = model.evaluate(test_inpf)\n",
+        "clear_output()\n",
+        "for key in sorted(results):\n",
+        "  print('%s: %0.2f' % (key, results[key]))"
+      ],
+      "execution_count": 36,
      "outputs": [
        {
-     "name": "stdout",
          "output_type": "stream",
          "text": [
            "accuracy: 0.84\n",
@@ -1312,23 +1721,21 @@
            "average_loss: 0.35\n",
            "global_step: 1018.00\n",
            "label/mean: 0.24\n",
-      "loss: 22.37\n",
+            "loss: 22.42\n",
-      "precision: 0.69\n",
+            "precision: 0.71\n",
-      "prediction/mean: 0.24\n",
+            "prediction/mean: 0.22\n",
-      "recall: 0.57\n"
+            "recall: 0.52\n"
-     ]
-    }
          ],
-   "source": [
+          "name": "stdout"
-    "results = model.evaluate(test_inpf)\n",
+        }
-    "clear_output()\n",
-    "for key in sorted(results):\n",
-    "  print('%s: %0.2f' % (key, results[key]))"
      ]
    },
    {
+      "metadata": {
+        "id": "E0fAibNDcYxL",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "The first line of the final output should be something like\n",
        "`accuracy: 0.83`, which means the accuracy is 83%. Feel free to try more\n",
@@ -1341,11 +1748,39 @@
      ]
    },
    {
+      "metadata": {
+        "id": "8R5bz5CxcYxL",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 669
+        },
+        "outputId": "71f5e775-0d24-4356-d785-3b06aa385957"
+      },
      "cell_type": "code",
-   "execution_count": 40,
+      "source": [
-   "metadata": {},
+        "import numpy as np\n",
+        "predict_df = test_df[:20].copy()\n",
+        "\n",
+        "pred_iter = model.predict(\n",
+        "    lambda:easy_input_function(predict_df, label_key='income_bracket',\n",
+        "                               num_epochs=1, shuffle=False, batch_size=10))\n",
+        "\n",
+        "classes = np.array(['<=50K', '>50K'])\n",
+        "pred_class_id = []\n",
+        "for pred_dict in pred_iter:\n",
+        "  pred_class_id.append(pred_dict['class_ids'])\n",
+        "\n",
+        "predict_df['predicted_class'] = classes[np.array(pred_class_id)]\n",
+        "predict_df['correct'] = predict_df['predicted_class'] == predict_df['income_bracket']\n",
+        "\n",
+        "clear_output()\n",
+        "predict_df[['income_bracket','predicted_class', 'correct']]"
+      ],
+      "execution_count": 37,
      "outputs": [
        {
+          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
@@ -1520,34 +1955,19 @@
              "19           >50K            >50K     True"
            ]
          },
-     "execution_count": 40,
+          "metadata": {
-     "metadata": {},
+            "tags": []
-     "output_type": "execute_result"
+          },
+          "execution_count": 37
        }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "predict_df = test_df[:20].copy()\n",
-    "\n",
-    "pred_iter = model.predict(\n",
-    "    lambda:easy_input_function(predict_df, label_key='income_bracket',\n",
-    "                               num_epochs=1, shuffle=False, batch_size=10))\n",
-    "\n",
-    "classes = np.array(['<=50K', '>50K'])\n",
-    "pred_class_id = []\n",
-    "for pred_dict in pred_iter:\n",
-    "  pred_class_id.append(pred_dict['class_ids'])\n",
-    "\n",
-    "predict_df['predicted_class'] = classes[np.array(pred_class_id)]\n",
-    "predict_df['correct'] = predict_df['predicted_class'] == predict_df['income_bracket']\n",
-    "\n",
-    "clear_output()\n",
-    "predict_df[['income_bracket','predicted_class', 'correct']]"
      ]
    },
    {
+      "metadata": {
+        "id": "N_uCpFTicYxN",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "If you'd like to see a working end-to-end example, you can download our\n",
        "[example code](https://github.com/tensorflow/models/tree/master/official/wide_deep/census_main.py)\n",
@@ -1568,28 +1988,16 @@
      ]
    },
    {
+      "metadata": {
+        "id": "cVv2HsqocYxO",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 204
+        },
+        "outputId": "68504270-5bcc-4a87-dbfa-7fd94cf54dff"
+      },
      "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "accuracy: 0.84\n",
-      "accuracy_baseline: 0.76\n",
-      "auc: 0.89\n",
-      "auc_precision_recall: 0.70\n",
-      "average_loss: 0.35\n",
-      "global_step: 2036.00\n",
-      "label/mean: 0.24\n",
-      "loss: 22.29\n",
-      "precision: 0.69\n",
-      "prediction/mean: 0.24\n",
-      "recall: 0.56\n"
-     ]
-    }
-   ],
      "source": [
        "#TODO(markdaoust): is the regularization strength here not working?\n",
        "model = tf.estimator.LinearClassifier(\n",
@@ -1605,11 +2013,34 @@
        "clear_output()\n",
        "for key in sorted(results):\n",
        "  print('%s: %0.2f' % (key, results[key]))"
+      ],
+      "execution_count": 38,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "accuracy: 0.84\n",
+            "accuracy_baseline: 0.76\n",
+            "auc: 0.89\n",
+            "auc_precision_recall: 0.70\n",
+            "average_loss: 0.35\n",
+            "global_step: 2036.00\n",
+            "label/mean: 0.24\n",
+            "loss: 22.28\n",
+            "precision: 0.70\n",
+            "prediction/mean: 0.24\n",
+            "recall: 0.55\n"
+          ],
+          "name": "stdout"
+        }
      ]
    },
    {
+      "metadata": {
+        "id": "5AqvPEQwcYxU",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "One important difference between L1 and L2 regularization is that L1\n",
        "regularization tends to make model weights stay at zero, creating sparser\n",
@@ -1626,8 +2057,11 @@
      ]
    },
    {
+      "metadata": {
+        "id": "i5119iMWcYxU",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "<a id=\"how_it_works\"> </a>\n",
        "## How Logistic Regression Works\n",
@@ -1675,8 +2109,11 @@
      ]
    },
    {
+      "metadata": {
+        "id": "hbXuPYQIcYxV",
+        "colab_type": "text"
+      },
      "cell_type": "markdown",
-   "metadata": {},
      "source": [
        "## What Next\n",
        "\n",
@@ -1689,32 +2126,21 @@
      ]
    },
    {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
      "metadata": {
-  "kernelspec": {
+        "id": "jpdw2z5WcYxV",
-   "display_name": "Python 3",
+        "colab_type": "code",
-   "language": "python",
+        "colab": {
-   "name": "python3"
+          "base_uri": "https://localhost:8080/",
+          "height": 17
        },
-  "language_info": {
+        "outputId": "403d18f6-d01e-47dc-dfc7-8c95d9a8ec34"
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.5.3"
-  }
      },
- "nbformat": 4,
+      "cell_type": "code",
- "nbformat_minor": 2
+      "source": [
+        ""
+      ],
+      "execution_count": 38,
+      "outputs": []
+    }
+  ]
 }
\ No newline at end of file