Commit 1056788e authored by Mark Daoust's avatar Mark Daoust
Browse files

fix accidental revert of some content

parent f18f1ba1
......@@ -6,6 +6,7 @@
"name": "linear.ipynb",
"version": "0.3.2",
"provenance": [],
"private_outputs": true,
"collapsed_sections": [
"MWW1TyjaecRh"
],
......@@ -114,6 +115,8 @@
"\n",
"import os\n",
"import sys\n",
"\n",
"import matplotlib.pyplot as plt\n",
"from IPython.display import clear_output"
],
"execution_count": 0,
......@@ -158,44 +161,15 @@
"metadata": {
"id": "tTwQzWcn8aBu",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 300
},
"outputId": "fa8bbeb9-cbee-49d7-b72e-faf20beb0cad"
"colab": {}
},
"cell_type": "code",
"source": [
"! pip install requests\n",
"! git clone --depth 1 https://github.com/tensorflow/models\n",
"sys.setdefaultencoding('UTF8')"
"! git clone --depth 1 https://github.com/tensorflow/models"
],
"execution_count": 36,
"outputs": [
{
"output_type": "stream",
"text": [
"Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (2.18.4)\n",
"Requirement already satisfied: urllib3<1.23,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests) (1.22)\n",
"Requirement already satisfied: idna<2.7,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests) (2.6)\n",
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests) (3.0.4)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests) (2018.4.16)\n",
"fatal: destination path 'models' already exists and is not an empty directory.\n"
],
"name": "stdout"
},
{
"output_type": "error",
"ename": "AttributeError",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-36-0a0fe82d151a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msystem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m' pip install requests'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msystem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m' git clone --depth 1 https://github.com/tensorflow/models'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msetdefaultencoding\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'UTF8'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m: module 'sys' has no attribute 'setdefaultencoding'"
]
}
]
"execution_count": 0,
"outputs": []
},
{
"metadata": {
......@@ -1122,7 +1096,9 @@
"]\n",
"\n",
"model = tf.estimator.LinearClassifier(\n",
" model_dir=tempfile.mkdtemp(), feature_columns=base_columns + crossed_columns)"
" model_dir=tempfile.mkdtemp(), \n",
" feature_columns=base_columns + crossed_columns,\n",
" optimizer=tf.train.FtrlOptimizer(learning_rate=0.1))"
],
"execution_count": 0,
"outputs": []
......@@ -1149,6 +1125,9 @@
},
"cell_type": "code",
"source": [
"train_inpf = functools.partial(census_dataset.input_fn, train_file, \n",
" num_epochs=40, shuffle=True, batch_size=64)\n",
"\n",
"model.train(train_inpf)\n",
"\n",
"clear_output() # used for notebook display"
......@@ -1239,6 +1218,180 @@
"source": [
"For a working end-to-end example, download our [example code](https://github.com/tensorflow/models/tree/master/official/wide_deep/census_main.py) and set the `model_type` flag to `wide`."
]
},
{
"metadata": {
"id": "oyKy1lM_3gkL",
"colab_type": "text"
},
"cell_type": "markdown",
"source": [
"## Adding Regularization to Prevent Overfitting\n",
"\n",
"Regularization is a technique used to avoid overfitting. Overfitting happens when a model performs well on the data it is trained on, but worse on test data that the model has not seen before. Overfitting can occur when a model is excessively complex, such as having too many parameters relative to the number of observed training data. Regularization allows you to control the model's complexity and make the model more generalizable to unseen data.\n",
"\n",
"You can add L1 and L2 regularizations to the model with the following code:"
]
},
{
"metadata": {
"id": "lzMUSBQ03hHx",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"model_l1 = tf.estimator.LinearClassifier(\n",
" feature_columns=base_columns + crossed_columns,\n",
" optimizer=tf.train.FtrlOptimizer(\n",
" learning_rate=0.1,\n",
" l1_regularization_strength=10.0,\n",
" l2_regularization_strength=0.0))\n",
"\n",
"model_l1.train(train_inpf)\n",
"\n",
"results = model_l1.evaluate(test_inpf)\n",
"clear_output()\n",
"for key in sorted(results):\n",
" print('%s: %0.2f' % (key, results[key]))"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "ofmPL212JIy2",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"model_l2 = tf.estimator.LinearClassifier(\n",
" feature_columns=base_columns + crossed_columns,\n",
" optimizer=tf.train.FtrlOptimizer(\n",
" learning_rate=0.1,\n",
" l1_regularization_strength=0.0,\n",
" l2_regularization_strength=10.0))\n",
"\n",
"model_l2.train(train_inpf)\n",
"\n",
"results = model_l2.evaluate(test_inpf)\n",
"clear_output()\n",
"for key in sorted(results):\n",
" print('%s: %0.2f' % (key, results[key]))"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "Lp1Rfy_k4e7w",
"colab_type": "text"
},
"cell_type": "markdown",
"source": [
"These regularized models don't perform much better than the base model. Let's look at the model's weight distributions to better see the effect of the regularization:"
]
},
{
"metadata": {
"id": "Wb6093N04XlS",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"def get_flat_weights(model):\n",
" weight_names = [\n",
" name for name in model.get_variable_names()\n",
" if \"linear_model\" in name and \"Ftrl\" not in name]\n",
"\n",
" weight_values = [model.get_variable_value(name) for name in weight_names]\n",
"\n",
" weights_flat = np.concatenate([item.flatten() for item in weight_values], axis=0)\n",
"\n",
" return weights_flat\n",
"\n",
"weights_flat = get_flat_weights(model)\n",
"weights_flat_l1 = get_flat_weights(model_l1)\n",
"weights_flat_l2 = get_flat_weights(model_l2)"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "GskJmtfmL0p-",
"colab_type": "text"
},
"cell_type": "markdown",
"source": [
"The models have many zero-valued weights caused by unused hash bins (there are many more hash bins than categories in some columns). We can mask these weights when viewing the weight distributions:"
]
},
{
"metadata": {
"id": "rM3agZe3MT3D",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"weight_mask = weights_flat != 0\n",
"\n",
"weights_base = weights_flat[weight_mask]\n",
"weights_l1 = weights_flat_l1[weight_mask]\n",
"weights_l2 = weights_flat_l2[weight_mask]"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "NqBpxLLQNEBE",
"colab_type": "text"
},
"cell_type": "markdown",
"source": [
"Now plot the distributions:"
]
},
{
"metadata": {
"id": "IdFK7wWa5_0K",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"plt.figure()\n",
"_ = plt.hist(weights_base, bins=np.linspace(-3,3,30))\n",
"plt.title('Base Model')\n",
"plt.ylim([0,500])\n",
"\n",
"plt.figure()\n",
"_ = plt.hist(weights_l1, bins=np.linspace(-3,3,30))\n",
"plt.title('L1 - Regularization')\n",
"plt.ylim([0,500])\n",
"\n",
"plt.figure()\n",
"_ = plt.hist(weights_l2, bins=np.linspace(-3,3,30))\n",
"plt.title('L2 - Regularization')\n",
"_=plt.ylim([0,500])\n",
"\n"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "Mv6knhFa5-iJ",
"colab_type": "text"
},
"cell_type": "markdown",
"source": [
"Both types of regularization squeeze the distribution of weights towards zero. L2 regularization has a greater effect in the tails of the distribution eliminating extreme weights. L1 regularization produces more exactly-zero values, in this case it sets ~200 to zero."
]
}
]
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment