Merge pull request #4785 from MarkDaoust/add-notebook

Re-add regularization section, with weight distributions.

Merge pull request #4785 from MarkDaoust/add-notebook
Re-add regularization section, with weight distributions.
78bd4c9a · Mark Daoust · GitHub · de284240 · 1aceb017 · 78bd4c9a
Unverified Commit 78bd4c9a authored Jul 17, 2018 by Mark Daoust Committed by GitHub Jul 17, 2018
Hide whitespace changes
Inline Side-by-side

Showing with 229 additions and 233 deletions

samples/core/tutorials/estimators/linear.ipynb samples/core/tutorials/estimators/linear.ipynb +229 -233

No files found.
--- a/samples/core/tutorials/estimators/linear.ipynb
+++ b/samples/core/tutorials/estimators/linear.ipynb
@@ -5,9 +5,8 @@
    "colab": {
      "name": "linear.ipynb",
      "version": "0.3.2",
-      "views": {},
-      "default_view": {},
      "provenance": [],
+      "private_outputs": true,
      "collapsed_sections": [
        "MWW1TyjaecRh"
      ],
@@ -34,12 +33,7 @@
      "metadata": {
        "id": "mOtR1FzCef-u",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -112,12 +106,7 @@
      "metadata": {
        "id": "NQgONe5ecYvE",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -126,6 +115,8 @@
        "\n",
        "import os\n",
        "import sys\n",
+        "\n",
+        "import matplotlib.pyplot as plt\n",
        "from IPython.display import clear_output"
      ],
      "execution_count": 0,
@@ -145,12 +136,7 @@
      "metadata": {
        "id": "tQzxON782Eby",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -175,15 +161,11 @@
      "metadata": {
        "id": "tTwQzWcn8aBu",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
+        "! pip install requests\n",
        "! git clone --depth 1 https://github.com/tensorflow/models"
      ],
      "execution_count": 0,
@@ -203,12 +185,7 @@
      "metadata": {
        "id": "yVvFyhnkcYvL",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -233,12 +210,7 @@
      "metadata": {
        "id": "6QilS4-0cYvQ",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -268,17 +240,17 @@
      "metadata": {
        "id": "DYOkY8boUptJ",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
        "#export PYTHONPATH=${PYTHONPATH}:\"$(pwd)/models\"\n",
-        "os.environ['PYTHONPATH'] += os.pathsep+models_path"
+        "#running from python you need to set the `os.environ` or the subprocess will not see the directory.\n",
+        "\n",
+        "if \"PYTHONPATH\" in os.environ:\n",
+        "  os.environ['PYTHONPATH'] += os.pathsep +  models_path\n",
+        "else:\n",
+        "  os.environ['PYTHONPATH'] = models_path"
      ],
      "execution_count": 0,
      "outputs": []
@@ -297,12 +269,7 @@
      "metadata": {
        "id": "1_3tBaLW4YM4",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -325,12 +292,7 @@
      "metadata": {
        "id": "py7MarZl5Yh6",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -359,12 +321,7 @@
      "metadata": {
        "id": "N6Tgye8bcYvX",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -377,12 +334,7 @@
      "metadata": {
        "id": "6y3mj9zKcYva",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -406,12 +358,7 @@
      "metadata": {
        "id": "vkn1FNmpcYvb",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -457,12 +404,7 @@
      "metadata": {
        "id": "N7zNJflKcYvg",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -494,12 +436,7 @@
      "metadata": {
        "id": "ygaKuikecYvi",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -531,12 +468,7 @@
      "metadata": {
        "id": "vUTeXaEUcYvn",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -560,12 +492,7 @@
      "metadata": {
        "id": "Mv3as_CEcYvu",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -595,12 +522,7 @@
      "metadata": {
        "id": "wnQdpEcVcYv0",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -654,12 +576,7 @@
      "metadata": {
        "id": "ZX0r2T5OcYv6",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -682,12 +599,7 @@
      "metadata": {
        "id": "kREtIPfwcYv_",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -710,12 +622,7 @@
      "metadata": {
        "id": "9R5eSJ1pcYwE",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -744,12 +651,7 @@
      "metadata": {
        "id": "uqPbUqlxcYwJ",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -779,12 +681,7 @@
      "metadata": {
        "id": "XN8k5S95cYwR",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -819,12 +716,7 @@
      "metadata": {
        "id": "0IjqSi9tcYwV",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -855,12 +747,7 @@
      "metadata": {
        "id": "kI43CYlncYwY",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -883,12 +770,7 @@
      "metadata": {
        "id": "8pSBaliCcYwb",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -912,12 +794,7 @@
      "metadata": {
        "id": "dCvQNv36cYwe",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -941,12 +818,7 @@
      "metadata": {
        "id": "0Y16peWacYwh",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -971,12 +843,7 @@
      "metadata": {
        "id": "q_ryRglmcYwk",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -1003,12 +870,7 @@
      "metadata": {
        "id": "0Z5eUrd_cYwo",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -1048,12 +910,7 @@
      "metadata": {
        "id": "_i_MLoo9cYws",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -1101,12 +958,7 @@
      "metadata": {
        "id": "KT4pjD9AcYww",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -1132,12 +984,7 @@
      "metadata": {
        "id": "Lr40vm3qcYwy",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -1164,12 +1011,7 @@
      "metadata": {
        "id": "IAPhPzXscYw1",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -1193,12 +1035,7 @@
      "metadata": {
        "id": "y8UaBld9cYw7",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -1243,12 +1080,7 @@
      "metadata": {
        "id": "Klmf3OxpcYw-",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -1267,7 +1099,9 @@
        "]\n",
        "\n",
        "model = tf.estimator.LinearClassifier(\n",
-        "    model_dir=tempfile.mkdtemp(), feature_columns=base_columns + crossed_columns)"
+        "    model_dir=tempfile.mkdtemp(), \n",
+        "    feature_columns=base_columns + crossed_columns,\n",
+        "    optimizer=tf.train.FtrlOptimizer(learning_rate=0.1))"
      ],
      "execution_count": 0,
      "outputs": []
@@ -1290,15 +1124,13 @@
      "metadata": {
        "id": "ZlrIBuoecYxD",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
+        "train_inpf = functools.partial(census_dataset.input_fn, train_file, \n",
+        "                               num_epochs=40, shuffle=True, batch_size=64)\n",
+        "\n",
        "model.train(train_inpf)\n",
        "\n",
        "clear_output()  # used for notebook display"
@@ -1320,12 +1152,7 @@
      "metadata": {
        "id": "L9nVJEO8cYxI",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -1357,12 +1184,7 @@
      "metadata": {
        "id": "8R5bz5CxcYxL",
        "colab_type": "code",
-        "colab": {
+        "colab": {}
-          "autoexec": {
-            "startup": false,
-            "wait_interval": 0
-          }
-        }
      },
      "cell_type": "code",
      "source": [
@@ -1399,6 +1221,180 @@
      "source": [
        "For a working end-to-end example,  download our [example code](https://github.com/tensorflow/models/tree/master/official/wide_deep/census_main.py) and set the `model_type` flag to `wide`."
      ]
+    },
+    {
+      "metadata": {
+        "id": "oyKy1lM_3gkL",
+        "colab_type": "text"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "## Adding Regularization to Prevent Overfitting\n",
+        "\n",
+        "Regularization is a technique used to avoid overfitting. Overfitting happens when a model performs well on the data it is trained on, but worse on test data that the model has not seen before. Overfitting can occur when a model is excessively complex, such as having too many parameters relative to the number of observed training data. Regularization allows you to control the model's complexity and make the model more generalizable to unseen data.\n",
+        "\n",
+        "You can add L1 and L2 regularizations to the model with the following code:"
+      ]
+    },
+    {
+      "metadata": {
+        "id": "lzMUSBQ03hHx",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "model_l1 = tf.estimator.LinearClassifier(\n",
+        "    feature_columns=base_columns + crossed_columns,\n",
+        "    optimizer=tf.train.FtrlOptimizer(\n",
+        "        learning_rate=0.1,\n",
+        "        l1_regularization_strength=10.0,\n",
+        "        l2_regularization_strength=0.0))\n",
+        "\n",
+        "model_l1.train(train_inpf)\n",
+        "\n",
+        "results = model_l1.evaluate(test_inpf)\n",
+        "clear_output()\n",
+        "for key in sorted(results):\n",
+        "  print('%s: %0.2f' % (key, results[key]))"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "id": "ofmPL212JIy2",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "model_l2 = tf.estimator.LinearClassifier(\n",
+        "    feature_columns=base_columns + crossed_columns,\n",
+        "    optimizer=tf.train.FtrlOptimizer(\n",
+        "        learning_rate=0.1,\n",
+        "        l1_regularization_strength=0.0,\n",
+        "        l2_regularization_strength=10.0))\n",
+        "\n",
+        "model_l2.train(train_inpf)\n",
+        "\n",
+        "results = model_l2.evaluate(test_inpf)\n",
+        "clear_output()\n",
+        "for key in sorted(results):\n",
+        "  print('%s: %0.2f' % (key, results[key]))"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "id": "Lp1Rfy_k4e7w",
+        "colab_type": "text"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "These regularized models don't perform much better than the base model. Let's look at the model's weight distributions to better see the effect of the regularization:"
+      ]
+    },
+    {
+      "metadata": {
+        "id": "Wb6093N04XlS",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "def get_flat_weights(model):\n",
+        "  weight_names = [\n",
+        "      name for name in model.get_variable_names()\n",
+        "      if \"linear_model\" in name and \"Ftrl\" not in name]\n",
+        "\n",
+        "  weight_values = [model.get_variable_value(name) for name in weight_names]\n",
+        "\n",
+        "  weights_flat = np.concatenate([item.flatten() for item in weight_values], axis=0)\n",
+        "\n",
+        "  return weights_flat\n",
+        "\n",
+        "weights_flat = get_flat_weights(model)\n",
+        "weights_flat_l1 = get_flat_weights(model_l1)\n",
+        "weights_flat_l2 = get_flat_weights(model_l2)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "id": "GskJmtfmL0p-",
+        "colab_type": "text"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "The models have many zero-valued weights caused by unused hash bins (there are many more hash bins than categories in some columns). We can mask these weights when viewing the weight distributions:"
+      ]
+    },
+    {
+      "metadata": {
+        "id": "rM3agZe3MT3D",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "weight_mask = weights_flat != 0\n",
+        "\n",
+        "weights_base = weights_flat[weight_mask]\n",
+        "weights_l1 = weights_flat_l1[weight_mask]\n",
+        "weights_l2 = weights_flat_l2[weight_mask]"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "id": "NqBpxLLQNEBE",
+        "colab_type": "text"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "Now plot the distributions:"
+      ]
+    },
+    {
+      "metadata": {
+        "id": "IdFK7wWa5_0K",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "cell_type": "code",
+      "source": [
+        "plt.figure()\n",
+        "_ = plt.hist(weights_base, bins=np.linspace(-3,3,30))\n",
+        "plt.title('Base Model')\n",
+        "plt.ylim([0,500])\n",
+        "\n",
+        "plt.figure()\n",
+        "_ = plt.hist(weights_l1, bins=np.linspace(-3,3,30))\n",
+        "plt.title('L1 - Regularization')\n",
+        "plt.ylim([0,500])\n",
+        "\n",
+        "plt.figure()\n",
+        "_ = plt.hist(weights_l2, bins=np.linspace(-3,3,30))\n",
+        "plt.title('L2 - Regularization')\n",
+        "_=plt.ylim([0,500])\n",
+        "\n"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "metadata": {
+        "id": "Mv6knhFa5-iJ",
+        "colab_type": "text"
+      },
+      "cell_type": "markdown",
+      "source": [
+        "Both types of regularization squeeze the distribution of weights towards zero. L2 regularization has a greater effect in the tails of the distribution eliminating extreme weights. L1 regularization produces more exactly-zero values, in this case it sets ~200 to zero."
+      ]
    }
  ]
 }
\ No newline at end of file