"sgl-kernel/vscode:/vscode.git/clone" did not exist on "d4c038daede43544d107f81cb5b6337c7a13803a"
Commit 9ba5b316 authored by Mark Daoust's avatar Mark Daoust
Browse files

Convert to colab format

parent 2c929976
{ {
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "wide.ipynb",
"version": "0.3.2",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
}
},
"cells": [ "cells": [
{ {
"metadata": {
"id": "Zr7KpBhMcYvE",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"# TensorFlow Linear Model Tutorial\n", "# TensorFlow Linear Model Tutorial\n",
"\n", "\n"
]
},
{
"metadata": {
"id": "77aETSYDcdoK",
"colab_type": "text"
},
"cell_type": "markdown",
"source": [
"In this tutorial, we will use the `tf.estimator` API in TensorFlow to solve a\n", "In this tutorial, we will use the `tf.estimator` API in TensorFlow to solve a\n",
"binary classification problem: Given census data about a person such as age,\n", "binary classification problem: Given census data about a person such as age,\n",
"education, marital status, and occupation (the features), we will try to predict\n", "education, marital status, and occupation (the features), we will try to predict\n",
...@@ -25,24 +51,16 @@ ...@@ -25,24 +51,16 @@
] ]
}, },
{ {
"metadata": {
"id": "NQgONe5ecYvE",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
},
"outputId": "7ab0889a-32f9-4ace-f848-6c808893b88c"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "tf.enable_eager_execution must be called at program startup.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-42-04d0fb7a9ec6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeature_column\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menable_eager_execution\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/venv3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py\u001b[0m in \u001b[0;36menable_eager_execution\u001b[0;34m(config, device_policy, execution_mode)\u001b[0m\n\u001b[1;32m 5238\u001b[0m \"\"\"\n\u001b[1;32m 5239\u001b[0m return enable_eager_execution_internal(\n\u001b[0;32m-> 5240\u001b[0;31m config, device_policy, execution_mode, None)\n\u001b[0m\u001b[1;32m 5241\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5242\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/venv3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py\u001b[0m in \u001b[0;36menable_eager_execution_internal\u001b[0;34m(config, device_policy, execution_mode, server_def)\u001b[0m\n\u001b[1;32m 5306\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5307\u001b[0m raise ValueError(\n\u001b[0;32m-> 5308\u001b[0;31m \"tf.enable_eager_execution must be called at program startup.\")\n\u001b[0m\u001b[1;32m 5309\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5310\u001b[0m \u001b[0;31m# Monkey patch to get rid of an unnecessary conditional since the context is\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: tf.enable_eager_execution must be called at program startup."
]
}
],
"source": [ "source": [
"import tensorflow as tf\n", "import tensorflow as tf\n",
"import tensorflow.feature_column as fc \n", "import tensorflow.feature_column as fc \n",
...@@ -51,29 +69,32 @@ ...@@ -51,29 +69,32 @@
"import os\n", "import os\n",
"import sys\n", "import sys\n",
"from IPython.display import clear_output" "from IPython.display import clear_output"
] ],
"execution_count": 1,
"outputs": []
}, },
{ {
"metadata": {
"id": "-MPr95UccYvL",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"Download the [tutorial code from github](https://github.com/tensorflow/models/tree/master/official/wide_deep/),\n", "Download the [tutorial code from github](https://github.com/tensorflow/models/tree/master/official/wide_deep/),\n",
" add the root directory to your python path, and jump to the `wide_deep` directory:" " add the root directory to your python path, and jump to the `wide_deep` directory:"
] ]
}, },
{ {
"metadata": {
"id": "yVvFyhnkcYvL",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 136
},
"outputId": "e57030d7-7f5c-455e-ea0f-55038e909d97"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"fatal: destination path 'models' already exists and is not an empty directory.\r\n"
]
}
],
"source": [ "source": [
"if \"wide_deep\" not in os.getcwd():\n", "if \"wide_deep\" not in os.getcwd():\n",
" ! git clone --depth 1 https://github.com/tensorflow/models\n", " ! git clone --depth 1 https://github.com/tensorflow/models\n",
...@@ -81,55 +102,96 @@ ...@@ -81,55 +102,96 @@
" sys.path.append(models_path) \n", " sys.path.append(models_path) \n",
" os.environ['PYTHONPATH'] += os.pathsep+models_path\n", " os.environ['PYTHONPATH'] += os.pathsep+models_path\n",
" os.chdir(\"models/official/wide_deep\")" " os.chdir(\"models/official/wide_deep\")"
],
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"text": [
"Cloning into 'models'...\n",
"remote: Counting objects: 2826, done.\u001b[K\n",
"remote: Compressing objects: 100% (2375/2375), done.\u001b[K\n",
"remote: Total 2826 (delta 543), reused 1731 (delta 382), pack-reused 0\u001b[K\n",
"Receiving objects: 100% (2826/2826), 371.22 MiB | 39.17 MiB/s, done.\n",
"Resolving deltas: 100% (543/543), done.\n",
"Checking out files: 100% (2934/2934), done.\n"
],
"name": "stdout"
}
] ]
}, },
{ {
"metadata": {
"id": "15Ethw-wcYvP",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"Execute the data download script:" "Execute the data download script:"
] ]
}, },
{ {
"metadata": {
"id": "6QilS4-0cYvQ",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
},
"outputId": "3faf2df7-677e-4a91-c09b-3d81ca30c9c1"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [ "source": [
"import census_dataset\n", "import census_dataset\n",
"import census_main\n", "import census_main\n",
"\n", "\n",
"census_dataset.download(\"/tmp/census_data/\")" "census_dataset.download(\"/tmp/census_data/\")"
] ],
"execution_count": 3,
"outputs": []
}, },
{ {
"metadata": {
"id": "cD5e3ibAcYvS",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"Execute the tutorial code with the following command to train the model described in this tutorial, from the command line:" "Execute the tutorial code with the following command to train the model described in this tutorial, from the command line:"
] ]
}, },
{ {
"metadata": {
"id": "vbJ8jPAhcYvT",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "cc0182c0-90d7-4f9c-b421-0dd67166c6d2"
},
"cell_type": "code", "cell_type": "code",
"source": [
"output = !python -m census_main --model_type=wide --train_epochs=2\n",
"print([line for line in output if 'accuracy:' in line])"
],
"execution_count": 4, "execution_count": 4,
"metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"['I0711 14:47:25.747490 139708077598464 tf_logging.py:115] accuracy: 0.833794']\n" "['I0711 22:27:15.442501 140285526747008 tf_logging.py:115] accuracy: 0.8360666']\n"
]
}
], ],
"source": [ "name": "stdout"
"output = !python -m census_main --model_type=wide --train_epochs=2\n", }
"print([line for line in output if 'accuracy:' in line])"
] ]
}, },
{ {
"metadata": {
"id": "AmZ4CpaOcYvV",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"Read on to find out how this code builds its linear model.\n", "Read on to find out how this code builds its linear model.\n",
"\n", "\n",
...@@ -151,38 +213,70 @@ ...@@ -151,38 +213,70 @@
] ]
}, },
{ {
"metadata": {
"id": "N6Tgye8bcYvX",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "75152d8d-6afa-4e4e-cc0e-3eac7127f8fd"
},
"cell_type": "code", "cell_type": "code",
"source": [
"!ls /tmp/census_data/"
],
"execution_count": 5, "execution_count": 5,
"metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"adult.data adult.test\r\n" "adult.data adult.test\r\n"
]
}
], ],
"source": [ "name": "stdout"
"!ls /tmp/census_data/" }
] ]
}, },
{ {
"metadata": {
"id": "6y3mj9zKcYva",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
},
"outputId": "3b44b7dd-5a2d-4943-eb19-20f26d5c7098"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [ "source": [
"train_file = \"/tmp/census_data/adult.data\"\n", "train_file = \"/tmp/census_data/adult.data\"\n",
"test_file = \"/tmp/census_data/adult.test\"" "test_file = \"/tmp/census_data/adult.test\""
] ],
"execution_count": 6,
"outputs": []
}, },
{ {
"metadata": {
"id": "vkn1FNmpcYvb",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "4e27b186-b76c-4f19-ea9d-abe19110e93b"
},
"cell_type": "code", "cell_type": "code",
"source": [
"import pandas\n",
"train_df = pandas.read_csv(train_file, header = None, names = census_dataset._CSV_COLUMNS)\n",
"test_df = pandas.read_csv(test_file, header = None, names = census_dataset._CSV_COLUMNS)\n",
"\n",
"train_df.head()"
],
"execution_count": 7, "execution_count": 7,
"metadata": {},
"outputs": [ "outputs": [
{ {
"output_type": "execute_result",
"data": { "data": {
"text/html": [ "text/html": [
"<div>\n", "<div>\n",
...@@ -338,22 +432,19 @@ ...@@ -338,22 +432,19 @@
"4 0 0 40 Cuba <=50K " "4 0 0 40 Cuba <=50K "
] ]
}, },
"execution_count": 7, "metadata": {
"metadata": {}, "tags": []
"output_type": "execute_result" },
"execution_count": 7
} }
],
"source": [
"import pandas\n",
"train_df = pandas.read_csv(train_file, header = None, names = census_dataset._CSV_COLUMNS)\n",
"test_df = pandas.read_csv(test_file, header = None, names = census_dataset._CSV_COLUMNS)\n",
"\n",
"train_df.head()"
] ]
}, },
{ {
"metadata": {
"id": "QZZtXes4cYvf",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"The columns can be grouped into two types—categorical\n", "The columns can be grouped into two types—categorical\n",
"and continuous columns:\n", "and continuous columns:\n",
...@@ -392,10 +483,16 @@ ...@@ -392,10 +483,16 @@
] ]
}, },
{ {
"metadata": {
"id": "N7zNJflKcYvg",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
},
"outputId": "4aebe747-0fca-4209-cf28-3164080ab89f"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [ "source": [
"def easy_input_function(df, label_key, num_epochs, shuffle, batch_size):\n", "def easy_input_function(df, label_key, num_epochs, shuffle, batch_size):\n",
" df = df.copy()\n", " df = df.copy()\n",
...@@ -408,34 +505,31 @@ ...@@ -408,34 +505,31 @@
" ds = ds.batch(batch_size).repeat(num_epochs)\n", " ds = ds.batch(batch_size).repeat(num_epochs)\n",
"\n", "\n",
" return ds" " return ds"
] ],
"execution_count": 8,
"outputs": []
}, },
{ {
"metadata": {
"id": "WeEgNR9AcYvh",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"Since we have eager execution enabled it is easy to inspect the resulting dataset:" "Since we have eager execution enabled it is easy to inspect the resulting dataset:"
] ]
}, },
{ {
"metadata": {
"id": "ygaKuikecYvi",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 136
},
"outputId": "071665a2-d23f-4c15-da43-ce0d106d473f"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Some feature keys: ['capital_gain', 'occupation', 'gender', 'capital_loss', 'workclass']\n",
"\n",
"A batch of Ages : tf.Tensor([61 18 37 47 47 32 18 23 28 37], shape=(10,), dtype=int32)\n",
"\n",
"A batch of Labels: tf.Tensor(\n",
"[b'>50K' b'<=50K' b'>50K' b'>50K' b'>50K' b'>50K' b'<=50K' b'<=50K'\n",
" b'<=50K' b'<=50K'], shape=(10,), dtype=string)\n"
]
}
],
"source": [ "source": [
"ds = easy_input_function(train_df, label_key='income_bracket', num_epochs=5, shuffle=True, batch_size=10)\n", "ds = easy_input_function(train_df, label_key='income_bracket', num_epochs=5, shuffle=True, batch_size=10)\n",
"\n", "\n",
...@@ -447,11 +541,30 @@ ...@@ -447,11 +541,30 @@
"print('A batch of Ages :', feature_batch['age'])\n", "print('A batch of Ages :', feature_batch['age'])\n",
"print()\n", "print()\n",
"print('A batch of Labels:', label_batch )" "print('A batch of Labels:', label_batch )"
],
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"text": [
"Some feature keys: ['age', 'workclass', 'fnlwgt', 'education', 'education_num']\n",
"\n",
"A batch of Ages : tf.Tensor([52 57 31 33 34 22 32 66 35 44], shape=(10,), dtype=int32)\n",
"\n",
"A batch of Labels: tf.Tensor(\n",
"[b'<=50K' b'<=50K' b'<=50K' b'<=50K' b'<=50K' b'<=50K' b'<=50K' b'<=50K'\n",
" b'<=50K' b'>50K'], shape=(10,), dtype=string)\n"
],
"name": "stdout"
}
] ]
}, },
{ {
"metadata": {
"id": "O_KZxQUucYvm",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"But this approach has severly-limited scalability. For larger data it should be streamed off disk.\n", "But this approach has severly-limited scalability. For larger data it should be streamed off disk.\n",
"the `census_dataset.input_fn` provides an example of how to do this using `tf.decode_csv` and `tf.data.TextLineDataset`: \n", "the `census_dataset.input_fn` provides an example of how to do this using `tf.decode_csv` and `tf.data.TextLineDataset`: \n",
...@@ -460,12 +573,23 @@ ...@@ -460,12 +573,23 @@
] ]
}, },
{ {
"metadata": {
"id": "vUTeXaEUcYvn",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 493
},
"outputId": "2da7413a-5e54-4e86-f3c5-07387156ab79"
},
"cell_type": "code", "cell_type": "code",
"source": [
"import inspect\n",
"print(inspect.getsource(census_dataset.input_fn))"
],
"execution_count": 10, "execution_count": 10,
"metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"def input_fn(data_file, num_epochs, shuffle, batch_size):\n", "def input_fn(data_file, num_epochs, shuffle, batch_size):\n",
...@@ -496,63 +620,65 @@ ...@@ -496,63 +620,65 @@
" dataset = dataset.batch(batch_size)\n", " dataset = dataset.batch(batch_size)\n",
" return dataset\n", " return dataset\n",
"\n" "\n"
]
}
], ],
"source": [ "name": "stdout"
"import inspect\n", }
"print(inspect.getsource(census_dataset.input_fn))"
] ]
}, },
{ {
"metadata": {
"id": "yyGcv_e-cYvq",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"This input_fn gives equivalent output:" "This input_fn gives equivalent output:"
] ]
}, },
{ {
"metadata": {
"id": "DlsqRZS5cYvr",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 68
},
"outputId": "31dee63f-80f7-4c7e-f749-a5531d33ab95"
},
"cell_type": "code", "cell_type": "code",
"source": [
"ds = census_dataset.input_fn(train_file, num_epochs=5, shuffle=True, batch_size=10)"
],
"execution_count": 11, "execution_count": 11,
"metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"INFO:tensorflow:Parsing /tmp/census_data/adult.data\n" "INFO:tensorflow:Parsing /tmp/census_data/adult.data\n"
] ],
"name": "stdout"
}, },
{ {
"name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"WARNING: Logging before flag parsing goes to stderr.\n", "WARNING: Logging before flag parsing goes to stderr.\n",
"I0711 14:47:26.362334 140466218788608 tf_logging.py:115] Parsing /tmp/census_data/adult.data\n" "I0711 22:27:19.570451 140174775953280 tf_logging.py:115] Parsing /tmp/census_data/adult.data\n"
]
}
], ],
"source": [ "name": "stderr"
"ds = census_dataset.input_fn(train_file, num_epochs=5, shuffle=True, batch_size=10)" }
] ]
}, },
{ {
"metadata": {
"id": "Mv3as_CEcYvu",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 102
},
"outputId": "3834b00d-9655-488f-d6d2-8d7405848d78"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature keys: ['capital_gain', 'occupation', 'gender', 'capital_loss', 'workclass']\n",
"\n",
"Age batch : tf.Tensor([46 38 42 37 29 48 46 40 73 49], shape=(10,), dtype=int32)\n",
"\n",
"Label batch : tf.Tensor([False False False False False False False False True False], shape=(10,), dtype=bool)\n"
]
}
],
"source": [ "source": [
"for feature_batch, label_batch in ds:\n", "for feature_batch, label_batch in ds:\n",
" break\n", " break\n",
...@@ -562,29 +688,57 @@ ...@@ -562,29 +688,57 @@
"print('Age batch :', feature_batch['age'])\n", "print('Age batch :', feature_batch['age'])\n",
"print()\n", "print()\n",
"print('Label batch :', label_batch )" "print('Label batch :', label_batch )"
],
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"text": [
"Feature keys: ['age', 'workclass', 'fnlwgt', 'education', 'education_num']\n",
"\n",
"Age batch : tf.Tensor([31 88 36 46 20 51 30 40 31 49], shape=(10,), dtype=int32)\n",
"\n",
"Label batch : tf.Tensor([False False True True False True True False False True], shape=(10,), dtype=bool)\n"
],
"name": "stdout"
}
] ]
}, },
{ {
"metadata": {
"id": "810fnfY5cYvz",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"Because `Estimators` expect an `input_fn` that takes no arguments, we typically wrap configurable input function into an obejct with the expected signature. For this notebook configure the `train_inpf` to iterate over the data twice:" "Because `Estimators` expect an `input_fn` that takes no arguments, we typically wrap configurable input function into an obejct with the expected signature. For this notebook configure the `train_inpf` to iterate over the data twice:"
] ]
}, },
{ {
"metadata": {
"id": "wnQdpEcVcYv0",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
},
"outputId": "b9050d80-e603-4363-dbe9-11c2b368e29d"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [ "source": [
"import functools\n", "import functools\n",
"train_inpf = functools.partial(census_dataset.input_fn, train_file, num_epochs=2, shuffle=True, batch_size=64)\n", "train_inpf = functools.partial(census_dataset.input_fn, train_file, num_epochs=2, shuffle=True, batch_size=64)\n",
"test_inpf = functools.partial(census_dataset.input_fn, test_file, num_epochs=1, shuffle=False, batch_size=64)" "test_inpf = functools.partial(census_dataset.input_fn, test_file, num_epochs=1, shuffle=False, batch_size=64)"
] ],
"execution_count": 13,
"outputs": []
}, },
{ {
"metadata": {
"id": "pboNpNWhcYv4",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"## Selecting and Engineering Features for the Model\n", "## Selecting and Engineering Features for the Model\n",
"\n", "\n",
...@@ -609,73 +763,92 @@ ...@@ -609,73 +763,92 @@
] ]
}, },
{ {
"metadata": {
"id": "ZX0r2T5OcYv6",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
},
"outputId": "283bf438-2a96-4bf3-fa89-94da99f93927"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [ "source": [
"age = fc.numeric_column('age')" "age = fc.numeric_column('age')"
] ],
"execution_count": 14,
"outputs": []
}, },
{ {
"metadata": {
"id": "tnLUiaHxcYv-",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"The model will use the `feature_column` definitions to build the model input. You can inspect the resulting output using the `input_layer` function:" "The model will use the `feature_column` definitions to build the model input. You can inspect the resulting output using the `input_layer` function:"
] ]
}, },
{ {
"cell_type": "code",
"execution_count": 15,
"metadata": { "metadata": {
"scrolled": true "id": "kREtIPfwcYv_",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 187
}, },
"outputId": "197a798b-9809-45e1-a8d4-ed5d237eea9d"
},
"cell_type": "code",
"source": [
"fc.input_layer(feature_batch, [age]).numpy()"
],
"execution_count": 15,
"outputs": [ "outputs": [
{ {
"output_type": "execute_result",
"data": { "data": {
"text/plain": [ "text/plain": [
"<tf.Tensor: id=237, shape=(10, 1), dtype=float32, numpy=\n", "array([[31.],\n",
"array([[46.],\n", " [88.],\n",
" [38.],\n", " [36.],\n",
" [42.],\n",
" [37.],\n",
" [29.],\n",
" [48.],\n",
" [46.],\n", " [46.],\n",
" [20.],\n",
" [51.],\n",
" [30.],\n",
" [40.],\n", " [40.],\n",
" [73.],\n", " [31.],\n",
" [49.]], dtype=float32)>" " [49.]], dtype=float32)"
] ]
}, },
"execution_count": 15, "metadata": {
"metadata": {}, "tags": []
"output_type": "execute_result" },
"execution_count": 15
} }
],
"source": [
"fc.input_layer(feature_batch, [age]).numpy()"
] ]
}, },
{ {
"metadata": {
"id": "OPuLduCucYwD",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"The following code will train and evaluate a model on only the `age` feature." "The following code will train and evaluate a model on only the `age` feature."
] ]
}, },
{ {
"metadata": {
"id": "9R5eSJ1pcYwE",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 54
},
"outputId": "ea791197-8300-4f31-cee1-f7d1b8209838"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'precision': 0.29166666, 'auc_precision_recall': 0.31132147, 'average_loss': 0.5239897, 'label/mean': 0.23622628, 'auc': 0.6781367, 'loss': 33.4552, 'prediction/mean': 0.22513431, 'accuracy': 0.7631595, 'recall': 0.0018200728, 'global_step': 1018, 'accuracy_baseline': 0.76377374}\n"
]
}
],
"source": [ "source": [
"classifier = tf.estimator.LinearClassifier(feature_columns=[age], n_classes=2)\n", "classifier = tf.estimator.LinearClassifier(feature_columns=[age], n_classes=2)\n",
"classifier.train(train_inpf)\n", "classifier.train(train_inpf)\n",
...@@ -683,98 +856,127 @@ ...@@ -683,98 +856,127 @@
"\n", "\n",
"clear_output()\n", "clear_output()\n",
"print(result)" "print(result)"
],
"execution_count": 16,
"outputs": [
{
"output_type": "stream",
"text": [
"{'accuracy': 0.76334375, 'accuracy_baseline': 0.76377374, 'auc': 0.67818105, 'auc_precision_recall': 0.31133735, 'average_loss': 0.52437353, 'label/mean': 0.23622628, 'loss': 33.479706, 'precision': 0.31578946, 'prediction/mean': 0.22410269, 'recall': 0.0015600624, 'global_step': 1018}\n"
],
"name": "stdout"
}
] ]
}, },
{ {
"metadata": {
"id": "YDZGcdTdcYwI",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"Similarly, we can define a `NumericColumn` for each continuous feature column\n", "Similarly, we can define a `NumericColumn` for each continuous feature column\n",
"that we want to use in the model:" "that we want to use in the model:"
] ]
}, },
{ {
"metadata": {
"id": "uqPbUqlxcYwJ",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
},
"outputId": "68f4ccfd-d71b-4327-b8e8-25c40e986bed"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [ "source": [
"education_num = tf.feature_column.numeric_column('education_num')\n", "education_num = tf.feature_column.numeric_column('education_num')\n",
"capital_gain = tf.feature_column.numeric_column('capital_gain')\n", "capital_gain = tf.feature_column.numeric_column('capital_gain')\n",
"capital_loss = tf.feature_column.numeric_column('capital_loss')\n", "capital_loss = tf.feature_column.numeric_column('capital_loss')\n",
"hours_per_week = tf.feature_column.numeric_column('hours_per_week')" "hours_per_week = tf.feature_column.numeric_column('hours_per_week')"
] ],
"execution_count": 17,
"outputs": []
}, },
{ {
"metadata": {
"id": "yqCF0a4DcYwM",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
},
"outputId": "0f9097a4-bc79-4e67-bd63-6a4d4461736d"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [ "source": [
"my_numeric_columns = [age,education_num, capital_gain, capital_loss, hours_per_week]" "my_numeric_columns = [age,education_num, capital_gain, capital_loss, hours_per_week]"
] ],
"execution_count": 18,
"outputs": []
}, },
{ {
"metadata": {
"id": "xDrZtAZ0cYwO",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "6fd558ea-9f0c-4deb-cb8a-6211ec233016"
},
"cell_type": "code", "cell_type": "code",
"source": [
"fc.input_layer(feature_batch, my_numeric_columns).numpy()"
],
"execution_count": 19, "execution_count": 19,
"metadata": {},
"outputs": [ "outputs": [
{ {
"output_type": "execute_result",
"data": { "data": {
"text/plain": [ "text/plain": [
"<tf.Tensor: id=2160, shape=(10, 5), dtype=float32, numpy=\n", "array([[3.1000e+01, 0.0000e+00, 0.0000e+00, 1.4000e+01, 4.3000e+01],\n",
"array([[4.600e+01, 0.000e+00, 0.000e+00, 6.000e+00, 4.000e+01],\n", " [8.8000e+01, 0.0000e+00, 0.0000e+00, 1.5000e+01, 4.0000e+01],\n",
" [3.800e+01, 4.508e+03, 0.000e+00, 1.300e+01, 4.000e+01],\n", " [3.6000e+01, 1.5024e+04, 0.0000e+00, 9.0000e+00, 4.0000e+01],\n",
" [4.200e+01, 0.000e+00, 0.000e+00, 1.400e+01, 4.000e+01],\n", " [4.6000e+01, 0.0000e+00, 0.0000e+00, 1.4000e+01, 5.5000e+01],\n",
" [3.700e+01, 0.000e+00, 0.000e+00, 1.100e+01, 4.000e+01],\n", " [2.0000e+01, 0.0000e+00, 0.0000e+00, 1.0000e+01, 1.0000e+01],\n",
" [2.900e+01, 0.000e+00, 0.000e+00, 9.000e+00, 4.000e+01],\n", " [5.1000e+01, 5.1780e+03, 0.0000e+00, 1.2000e+01, 4.5000e+01],\n",
" [4.800e+01, 0.000e+00, 0.000e+00, 1.300e+01, 5.500e+01],\n", " [3.0000e+01, 1.5024e+04, 0.0000e+00, 1.4000e+01, 6.0000e+01],\n",
" [4.600e+01, 0.000e+00, 0.000e+00, 9.000e+00, 5.000e+01],\n", " [4.0000e+01, 0.0000e+00, 0.0000e+00, 9.0000e+00, 4.0000e+01],\n",
" [4.000e+01, 0.000e+00, 0.000e+00, 9.000e+00, 4.000e+01],\n", " [3.1000e+01, 0.0000e+00, 0.0000e+00, 1.0000e+01, 1.0000e+01],\n",
" [7.300e+01, 6.418e+03, 0.000e+00, 4.000e+00, 9.900e+01],\n", " [4.9000e+01, 0.0000e+00, 0.0000e+00, 1.3000e+01, 4.0000e+01]],\n",
" [4.900e+01, 0.000e+00, 0.000e+00, 4.000e+00, 4.000e+01]],\n", " dtype=float32)"
" dtype=float32)>"
] ]
}, },
"execution_count": 19, "metadata": {
"metadata": {}, "tags": []
"output_type": "execute_result" },
"execution_count": 19
} }
],
"source": [
"fc.input_layer(feature_batch, my_numeric_columns).numpy()"
] ]
}, },
{ {
"metadata": {
"id": "cBGDN97IcYwQ",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"You could retrain a model on these features with, just by changing the `feature_columns` argument to the constructor:" "You could retrain a model on these features with, just by changing the `feature_columns` argument to the constructor:"
] ]
}, },
{ {
"metadata": {
"id": "XN8k5S95cYwR",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "72be27c1-e25c-4609-a703-8297c936177a"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"accuracy: 0.7817087\n",
"accuracy_baseline: 0.76377374\n",
"auc: 0.8027547\n",
"auc_precision_recall: 0.5611528\n",
"average_loss: 1.0698086\n",
"global_step: 1018\n",
"label/mean: 0.23622628\n",
"loss: 68.30414\n",
"precision: 0.57025987\n",
"prediction/mean: 0.36397633\n",
"recall: 0.30811232\n"
]
}
],
"source": [ "source": [
"classifier = tf.estimator.LinearClassifier(feature_columns=my_numeric_columns, n_classes=2)\n", "classifier = tf.estimator.LinearClassifier(feature_columns=my_numeric_columns, n_classes=2)\n",
"classifier.train(train_inpf)\n", "classifier.train(train_inpf)\n",
...@@ -784,11 +986,34 @@ ...@@ -784,11 +986,34 @@
"clear_output()\n", "clear_output()\n",
"for key,value in sorted(result.items()):\n", "for key,value in sorted(result.items()):\n",
" print('%s: %s' % (key, value))" " print('%s: %s' % (key, value))"
],
"execution_count": 20,
"outputs": [
{
"output_type": "stream",
"text": [
"accuracy: 0.76377374\n",
"accuracy_baseline: 0.76377374\n",
"auc: 0.539677\n",
"auc_precision_recall: 0.334656\n",
"average_loss: 1.4886041\n",
"global_step: 1018\n",
"label/mean: 0.23622628\n",
"loss: 95.04299\n",
"precision: 0.0\n",
"prediction/mean: 0.21315515\n",
"recall: 0.0\n"
],
"name": "stdout"
}
] ]
}, },
{ {
"metadata": {
"id": "jBRq9_AzcYwU",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"#### Categorical columns\n", "#### Categorical columns\n",
"\n", "\n",
...@@ -799,20 +1024,31 @@ ...@@ -799,20 +1024,31 @@
] ]
}, },
{ {
"metadata": {
"id": "0IjqSi9tcYwV",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 37
},
"outputId": "859f282d-7a9c-417b-a615-643a15d10118"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [ "source": [
"relationship = fc.categorical_column_with_vocabulary_list(\n", "relationship = fc.categorical_column_with_vocabulary_list(\n",
" 'relationship', [\n", " 'relationship', [\n",
" 'Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried',\n", " 'Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried',\n",
" 'Other-relative'])\n" " 'Other-relative'])\n"
] ],
"execution_count": 21,
"outputs": []
}, },
{ {
"metadata": {
"id": "-RjoWv-7cYwW",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"This will create a sparse one-hot vector from the raw input feature.\n", "This will create a sparse one-hot vector from the raw input feature.\n",
"\n", "\n",
...@@ -824,122 +1060,168 @@ ...@@ -824,122 +1060,168 @@
] ]
}, },
{ {
"cell_type": "code",
"execution_count": 23,
"metadata": { "metadata": {
"scrolled": true "id": "kI43CYlncYwY",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 224
}, },
"outputId": "458177e5-4bc0-48f2-b1fb-614b91dd99e6"
},
"cell_type": "code",
"source": [
"fc.input_layer(feature_batch, [age, fc.indicator_column(relationship)])"
],
"execution_count": 22,
"outputs": [ "outputs": [
{ {
"output_type": "execute_result",
"data": { "data": {
"text/plain": [ "text/plain": [
"<tf.Tensor: id=4490, shape=(10, 7), dtype=float32, numpy=\n", "<tf.Tensor: id=4361, shape=(10, 7), dtype=float32, numpy=\n",
"array([[46., 0., 0., 0., 0., 1., 0.],\n", "array([[31., 0., 1., 0., 0., 0., 0.],\n",
" [38., 1., 0., 0., 0., 0., 0.],\n", " [88., 1., 0., 0., 0., 0., 0.],\n",
" [42., 0., 1., 0., 0., 0., 0.],\n", " [36., 1., 0., 0., 0., 0., 0.],\n",
" [37., 1., 0., 0., 0., 0., 0.],\n",
" [29., 1., 0., 0., 0., 0., 0.],\n",
" [48., 1., 0., 0., 0., 0., 0.],\n",
" [46., 1., 0., 0., 0., 0., 0.],\n", " [46., 1., 0., 0., 0., 0., 0.],\n",
" [20., 0., 1., 0., 0., 0., 0.],\n",
" [51., 1., 0., 0., 0., 0., 0.],\n",
" [30., 1., 0., 0., 0., 0., 0.],\n",
" [40., 1., 0., 0., 0., 0., 0.],\n", " [40., 1., 0., 0., 0., 0., 0.],\n",
" [73., 1., 0., 0., 0., 0., 0.],\n", " [31., 0., 0., 1., 0., 0., 0.],\n",
" [49., 1., 0., 0., 0., 0., 0.]], dtype=float32)>" " [49., 0., 1., 0., 0., 0., 0.]], dtype=float32)>"
] ]
}, },
"execution_count": 23, "metadata": {
"metadata": {}, "tags": []
"output_type": "execute_result" },
"execution_count": 22
} }
],
"source": [
"fc.input_layer(feature_batch, [age, fc.indicator_column(relationship)])"
] ]
}, },
{ {
"metadata": {
"id": "tTudP7WHcYwb",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"What if we don't know the set of possible values in advance? Not a problem. We\n", "What if we don't know the set of possible values in advance? Not a problem. We\n",
"can use `categorical_column_with_hash_bucket` instead:" "can use `categorical_column_with_hash_bucket` instead:"
] ]
}, },
{ {
"metadata": {
"id": "8pSBaliCcYwb",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 37
},
"outputId": "e9b2e611-1311-4933-af0a-489e03fdc960"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [ "source": [
"occupation = tf.feature_column.categorical_column_with_hash_bucket(\n", "occupation = tf.feature_column.categorical_column_with_hash_bucket(\n",
" 'occupation', hash_bucket_size=1000)" " 'occupation', hash_bucket_size=1000)"
] ],
"execution_count": 23,
"outputs": []
}, },
{ {
"metadata": {
"id": "fSAPrqQkcYwd",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"What will happen is that each possible value in the feature column `occupation`\n", "What will happen is that each possible value in the feature column `occupation`\n",
"will be hashed to an integer ID as we encounter them in training. The example batch has a few different occupations:" "will be hashed to an integer ID as we encounter them in training. The example batch has a few different occupations:"
] ]
}, },
{ {
"metadata": {
"id": "dCvQNv36cYwe",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 207
},
"outputId": "23ebfedd-faf8-425b-a855-9897aba20341"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 25, "source": [
"metadata": {}, "for item in feature_batch['occupation'].numpy():\n",
" print(item.decode())"
],
"execution_count": 24,
"outputs": [ "outputs": [
{ {
"name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Machine-op-inspct\n",
"Transport-moving\n",
"Prof-specialty\n", "Prof-specialty\n",
"Adm-clerical\n", "Exec-managerial\n",
"Handlers-cleaners\n",
"Prof-specialty\n", "Prof-specialty\n",
"Other-service\n", "Exec-managerial\n",
"Farming-fishing\n", "Tech-support\n",
"Farming-fishing\n", "Sales\n",
"Handlers-cleaners\n" "Exec-managerial\n",
] "Machine-op-inspct\n",
} "?\n",
"Exec-managerial\n"
], ],
"source": [ "name": "stdout"
"for item in feature_batch['occupation'].numpy():\n", }
" print(item.decode())"
] ]
}, },
{ {
"metadata": {
"id": "KP5hN2rAcYwh",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"if we run `input_layer` with the hashed column we see that the output shape is `(batch_size, hash_bucket_size)`" "if we run `input_layer` with the hashed column we see that the output shape is `(batch_size, hash_bucket_size)`"
] ]
}, },
{ {
"metadata": {
"id": "0Y16peWacYwh",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 54
},
"outputId": "524b1af5-c492-4d0e-b736-7974ca618089"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 27, "source": [
"metadata": {}, "occupation_result = fc.input_layer(feature_batch, [fc.indicator_column(occupation)])\n",
"\n",
"occupation_result.numpy().shape"
],
"execution_count": 25,
"outputs": [ "outputs": [
{ {
"output_type": "execute_result",
"data": { "data": {
"text/plain": [ "text/plain": [
"(10, 1000)" "(10, 1000)"
] ]
}, },
"execution_count": 27, "metadata": {
"metadata": {}, "tags": []
"output_type": "execute_result" },
"execution_count": 25
} }
],
"source": [
"occupation_result = fc.input_layer(feature_batch, [fc.indicator_column(occupation)])\n",
"\n",
"occupation_result.numpy().shape"
] ]
}, },
{ {
"metadata": {
"id": "HMW2MzWAcYwk",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"It's easier to see the actual results if we take the tf.argmax over the `hash_bucket_size` dimension.\n", "It's easier to see the actual results if we take the tf.argmax over the `hash_bucket_size` dimension.\n",
"\n", "\n",
...@@ -949,28 +1231,41 @@ ...@@ -949,28 +1231,41 @@
] ]
}, },
{ {
"metadata": {
"id": "q_ryRglmcYwk",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 54
},
"outputId": "e1797664-1200-48e3-c774-52e7e0a18f00"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 28, "source": [
"metadata": {}, "tf.argmax(occupation_result, axis=1).numpy()"
],
"execution_count": 26,
"outputs": [ "outputs": [
{ {
"output_type": "execute_result",
"data": { "data": {
"text/plain": [ "text/plain": [
"array([911, 420, 979, 96, 10, 979, 527, 936, 936, 10])" "array([979, 800, 979, 800, 413, 631, 800, 911, 65, 800])"
] ]
}, },
"execution_count": 28, "metadata": {
"metadata": {}, "tags": []
"output_type": "execute_result" },
"execution_count": 26
} }
],
"source": [
"tf.argmax(occupation_result, axis=1).numpy()"
] ]
}, },
{ {
"metadata": {
"id": "j1e5NfyKcYwn",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"No matter which way we choose to define a `SparseColumn`, each feature string\n", "No matter which way we choose to define a `SparseColumn`, each feature string\n",
"will be mapped into an integer ID by looking up a fixed mapping or by hashing.\n", "will be mapped into an integer ID by looking up a fixed mapping or by hashing.\n",
...@@ -983,10 +1278,16 @@ ...@@ -983,10 +1278,16 @@
] ]
}, },
{ {
"metadata": {
"id": "0Z5eUrd_cYwo",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 37
},
"outputId": "becd1bda-9014-4b9e-92ef-ba4ee2ed52fa"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [ "source": [
"education = tf.feature_column.categorical_column_with_vocabulary_list(\n", "education = tf.feature_column.categorical_column_with_vocabulary_list(\n",
" 'education', [\n", " 'education', [\n",
...@@ -1003,47 +1304,48 @@ ...@@ -1003,47 +1304,48 @@
" 'workclass', [\n", " 'workclass', [\n",
" 'Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov',\n", " 'Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov',\n",
" 'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked'])\n" " 'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked'])\n"
] ],
"execution_count": 27,
"outputs": []
}, },
{ {
"metadata": {
"id": "a03l9ozUcYwp",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 37
},
"outputId": "374c7f00-8d2e-458f-ec32-b4cbc6b7386f"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [ "source": [
"my_categorical_columns = [relationship, occupation, education, marital_status, workclass]" "my_categorical_columns = [relationship, occupation, education, marital_status, workclass]"
] ],
"execution_count": 28,
"outputs": []
}, },
{ {
"metadata": {
"id": "ASQJM1pEcYwr",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"It's easy to use both sets of columns to configure a model that uses all these features:" "It's easy to use both sets of columns to configure a model that uses all these features:"
] ]
}, },
{ {
"metadata": {
"id": "_i_MLoo9cYws",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 224
},
"outputId": "95ab18a4-2ec1-4fad-c207-2f86b607a333"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"accuracy: 0.83342546\n",
"accuracy_baseline: 0.76377374\n",
"auc: 0.8807037\n",
"auc_precision_recall: 0.6601031\n",
"average_loss: 0.8671454\n",
"global_step: 1018\n",
"label/mean: 0.23622628\n",
"loss: 55.36468\n",
"precision: 0.6496042\n",
"prediction/mean: 0.2628341\n",
"recall: 0.6401456\n"
]
}
],
"source": [ "source": [
"classifier = tf.estimator.LinearClassifier(feature_columns=my_numeric_columns+my_categorical_columns, n_classes=2)\n", "classifier = tf.estimator.LinearClassifier(feature_columns=my_numeric_columns+my_categorical_columns, n_classes=2)\n",
"classifier.train(train_inpf)\n", "classifier.train(train_inpf)\n",
...@@ -1052,11 +1354,34 @@ ...@@ -1052,11 +1354,34 @@
"clear_output()\n", "clear_output()\n",
"for key,value in sorted(result.items()):\n", "for key,value in sorted(result.items()):\n",
" print('%s: %s' % (key, value))" " print('%s: %s' % (key, value))"
],
"execution_count": 29,
"outputs": [
{
"output_type": "stream",
"text": [
"accuracy: 0.81978995\n",
"accuracy_baseline: 0.76377374\n",
"auc: 0.869223\n",
"auc_precision_recall: 0.6459037\n",
"average_loss: 1.9878242\n",
"global_step: 1018\n",
"label/mean: 0.23622628\n",
"loss: 126.916725\n",
"precision: 0.60679156\n",
"prediction/mean: 0.2908891\n",
"recall: 0.6736869\n"
],
"name": "stdout"
}
] ]
}, },
{ {
"metadata": {
"id": "zdKEqF6xcYwv",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"### Derived feature columns\n", "### Derived feature columns\n",
"\n", "\n",
...@@ -1082,18 +1407,29 @@ ...@@ -1082,18 +1407,29 @@
] ]
}, },
{ {
"metadata": {
"id": "KT4pjD9AcYww",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
},
"outputId": "633c1bb5-e5e2-4cf3-8392-5caf473607da"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [ "source": [
"age_buckets = tf.feature_column.bucketized_column(\n", "age_buckets = tf.feature_column.bucketized_column(\n",
" age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])" " age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])"
] ],
"execution_count": 30,
"outputs": []
}, },
{ {
"metadata": {
"id": "S-XOscrEcYwx",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"where the `boundaries` is a list of bucket boundaries. In this case, there are\n", "where the `boundaries` is a list of bucket boundaries. In this case, there are\n",
"10 boundaries, resulting in 11 age group buckets (from age 17 and below, 18-24,\n", "10 boundaries, resulting in 11 age group buckets (from age 17 and below, 18-24,\n",
...@@ -1103,38 +1439,51 @@ ...@@ -1103,38 +1439,51 @@
] ]
}, },
{ {
"metadata": {
"id": "Lr40vm3qcYwy",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "e53a3d92-f8d4-4ff7-da5e-46f498eb2316"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 33, "source": [
"metadata": {}, "fc.input_layer(feature_batch, [age, age_buckets]).numpy()"
],
"execution_count": 31,
"outputs": [ "outputs": [
{ {
"output_type": "execute_result",
"data": { "data": {
"text/plain": [ "text/plain": [
"array([[46., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],\n", "array([[31., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],\n",
" [38., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", " [88., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],\n",
" [42., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],\n", " [36., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n",
" [37., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n",
" [29., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [48., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],\n",
" [46., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],\n", " [46., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],\n",
" [20., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [51., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],\n",
" [30., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],\n",
" [40., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],\n", " [40., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],\n",
" [73., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],\n", " [31., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],\n",
" [49., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]],\n", " [49., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]],\n",
" dtype=float32)" " dtype=float32)"
] ]
}, },
"execution_count": 33, "metadata": {
"metadata": {}, "tags": []
"output_type": "execute_result" },
"execution_count": 31
} }
],
"source": [
"fc.input_layer(feature_batch, [age, age_buckets]).numpy()"
] ]
}, },
{ {
"metadata": {
"id": "Z_tQI9j8cYw1",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"#### Learn complex relationships with crossed column\n", "#### Learn complex relationships with crossed column\n",
"\n", "\n",
...@@ -1150,18 +1499,29 @@ ...@@ -1150,18 +1499,29 @@
] ]
}, },
{ {
"metadata": {
"id": "IAPhPzXscYw1",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 37
},
"outputId": "4dd22eaf-3917-449d-9068-5306ae60b6a6"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [ "source": [
"education_x_occupation = tf.feature_column.crossed_column(\n", "education_x_occupation = tf.feature_column.crossed_column(\n",
" ['education', 'occupation'], hash_bucket_size=1000)" " ['education', 'occupation'], hash_bucket_size=1000)"
] ],
"execution_count": 32,
"outputs": []
}, },
{ {
"metadata": {
"id": "UeTxMunbcYw5",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"We can also create a `crossed_column` over more than two columns. Each\n", "We can also create a `crossed_column` over more than two columns. Each\n",
"constituent column can be either a base feature column that is categorical\n", "constituent column can be either a base feature column that is categorical\n",
...@@ -1170,18 +1530,29 @@ ...@@ -1170,18 +1530,29 @@
] ]
}, },
{ {
"metadata": {
"id": "y8UaBld9cYw7",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 37
},
"outputId": "4abb43e7-c406-4caf-f15e-71af723ec8df"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [ "source": [
"age_buckets_x_education_x_occupation = tf.feature_column.crossed_column(\n", "age_buckets_x_education_x_occupation = tf.feature_column.crossed_column(\n",
" [age_buckets, 'education', 'occupation'], hash_bucket_size=1000)" " [age_buckets, 'education', 'occupation'], hash_bucket_size=1000)"
] ],
"execution_count": 33,
"outputs": []
}, },
{ {
"metadata": {
"id": "HvKmW6U5cYw8",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"These crossed columns always use hash buckets to avoid the exponential explosion in the number of categories, and put the control over number of model weights in the hands of the user.\n", "These crossed columns always use hash buckets to avoid the exponential explosion in the number of categories, and put the control over number of model weights in the hands of the user.\n",
"\n", "\n",
...@@ -1190,8 +1561,11 @@ ...@@ -1190,8 +1561,11 @@
] ]
}, },
{ {
"metadata": {
"id": "HtjpheB6cYw9",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"## Defining The Logistic Regression Model\n", "## Defining The Logistic Regression Model\n",
"\n", "\n",
...@@ -1210,39 +1584,16 @@ ...@@ -1210,39 +1584,16 @@
] ]
}, },
{ {
"cell_type": "code", "metadata": {
"execution_count": 36, "id": "Klmf3OxpcYw-",
"metadata": {}, "colab_type": "code",
"outputs": [ "colab": {
{ "base_uri": "https://localhost:8080/",
"name": "stdout", "height": 105
"output_type": "stream",
"text": [
"INFO:tensorflow:Using default config.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"I0711 14:48:54.071429 140466218788608 tf_logging.py:115] Using default config.\n"
]
}, },
{ "outputId": "a8f46b90-a9d0-4d33-fff5-38b530e35d43"
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Using config: {'_global_id_in_cluster': 0, '_is_chief': True, '_keep_checkpoint_every_n_hours': 10000, '_tf_random_seed': None, '_num_worker_replicas': 1, '_device_fn': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fc03341f668>, '_evaluation_master': '', '_train_distribute': None, '_model_dir': '/tmp/tmpligbanno', '_session_config': None, '_save_checkpoints_steps': None, '_master': '', '_num_ps_replicas': 0, '_task_type': 'worker', '_log_step_count_steps': 100, '_save_summary_steps': 100, '_service': None, '_task_id': 0, '_save_checkpoints_secs': 600, '_keep_checkpoint_max': 5}\n"
]
}, },
{ "cell_type": "code",
"name": "stderr",
"output_type": "stream",
"text": [
"I0711 14:48:54.073915 140466218788608 tf_logging.py:115] Using config: {'_global_id_in_cluster': 0, '_is_chief': True, '_keep_checkpoint_every_n_hours': 10000, '_tf_random_seed': None, '_num_worker_replicas': 1, '_device_fn': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fc03341f668>, '_evaluation_master': '', '_train_distribute': None, '_model_dir': '/tmp/tmpligbanno', '_session_config': None, '_save_checkpoints_steps': None, '_master': '', '_num_ps_replicas': 0, '_task_type': 'worker', '_log_step_count_steps': 100, '_save_summary_steps': 100, '_service': None, '_task_id': 0, '_save_checkpoints_secs': 600, '_keep_checkpoint_max': 5}\n"
]
}
],
"source": [ "source": [
"import tempfile\n", "import tempfile\n",
"\n", "\n",
...@@ -1260,11 +1611,45 @@ ...@@ -1260,11 +1611,45 @@
"model_dir = tempfile.mkdtemp()\n", "model_dir = tempfile.mkdtemp()\n",
"model = tf.estimator.LinearClassifier(\n", "model = tf.estimator.LinearClassifier(\n",
" model_dir=model_dir, feature_columns=base_columns + crossed_columns)" " model_dir=model_dir, feature_columns=base_columns + crossed_columns)"
],
"execution_count": 34,
"outputs": [
{
"output_type": "stream",
"text": [
"INFO:tensorflow:Using default config.\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"I0711 22:27:55.502184 140174775953280 tf_logging.py:115] Using default config.\n"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp93vf5hp6', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f7cc6df0ba8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"I0711 22:27:55.509107 140174775953280 tf_logging.py:115] Using config: {'_model_dir': '/tmp/tmp93vf5hp6', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f7cc6df0ba8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n"
],
"name": "stderr"
}
] ]
}, },
{ {
"metadata": {
"id": "jRhnPxUucYxC",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"The model also automatically learns a bias term, which controls the prediction\n", "The model also automatically learns a bias term, which controls the prediction\n",
"one would make without observing any features (see the section [How Logistic\n", "one would make without observing any features (see the section [How Logistic\n",
...@@ -1279,30 +1664,54 @@ ...@@ -1279,30 +1664,54 @@
] ]
}, },
{ {
"metadata": {
"id": "ZlrIBuoecYxD",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
},
"outputId": "5aa0bc8c-9496-4301-963a-78bcef54e17a"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [ "source": [
"model.train(train_inpf)\n", "model.train(train_inpf)\n",
"clear_output()" "clear_output()"
] ],
"execution_count": 35,
"outputs": []
}, },
{ {
"metadata": {
"id": "IvY3a9pzcYxH",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"After the model is trained, we can evaluate how good our model is at predicting\n", "After the model is trained, we can evaluate how good our model is at predicting\n",
"the labels of the holdout data:" "the labels of the holdout data:"
] ]
}, },
{ {
"metadata": {
"id": "L9nVJEO8cYxI",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "8eb14bd7-9030-4381-c18a-6a5c7c17c569"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 39, "source": [
"metadata": {}, "results = model.evaluate(test_inpf)\n",
"clear_output()\n",
"for key in sorted(results):\n",
" print('%s: %0.2f' % (key, results[key]))"
],
"execution_count": 36,
"outputs": [ "outputs": [
{ {
"name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"accuracy: 0.84\n", "accuracy: 0.84\n",
...@@ -1312,23 +1721,21 @@ ...@@ -1312,23 +1721,21 @@
"average_loss: 0.35\n", "average_loss: 0.35\n",
"global_step: 1018.00\n", "global_step: 1018.00\n",
"label/mean: 0.24\n", "label/mean: 0.24\n",
"loss: 22.37\n", "loss: 22.42\n",
"precision: 0.69\n", "precision: 0.71\n",
"prediction/mean: 0.24\n", "prediction/mean: 0.22\n",
"recall: 0.57\n" "recall: 0.52\n"
]
}
], ],
"source": [ "name": "stdout"
"results = model.evaluate(test_inpf)\n", }
"clear_output()\n",
"for key in sorted(results):\n",
" print('%s: %0.2f' % (key, results[key]))"
] ]
}, },
{ {
"metadata": {
"id": "E0fAibNDcYxL",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"The first line of the final output should be something like\n", "The first line of the final output should be something like\n",
"`accuracy: 0.83`, which means the accuracy is 83%. Feel free to try more\n", "`accuracy: 0.83`, which means the accuracy is 83%. Feel free to try more\n",
...@@ -1341,11 +1748,39 @@ ...@@ -1341,11 +1748,39 @@
] ]
}, },
{ {
"metadata": {
"id": "8R5bz5CxcYxL",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 669
},
"outputId": "71f5e775-0d24-4356-d785-3b06aa385957"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 40, "source": [
"metadata": {}, "import numpy as np\n",
"predict_df = test_df[:20].copy()\n",
"\n",
"pred_iter = model.predict(\n",
" lambda:easy_input_function(predict_df, label_key='income_bracket',\n",
" num_epochs=1, shuffle=False, batch_size=10))\n",
"\n",
"classes = np.array(['<=50K', '>50K'])\n",
"pred_class_id = []\n",
"for pred_dict in pred_iter:\n",
" pred_class_id.append(pred_dict['class_ids'])\n",
"\n",
"predict_df['predicted_class'] = classes[np.array(pred_class_id)]\n",
"predict_df['correct'] = predict_df['predicted_class'] == predict_df['income_bracket']\n",
"\n",
"clear_output()\n",
"predict_df[['income_bracket','predicted_class', 'correct']]"
],
"execution_count": 37,
"outputs": [ "outputs": [
{ {
"output_type": "execute_result",
"data": { "data": {
"text/html": [ "text/html": [
"<div>\n", "<div>\n",
...@@ -1520,34 +1955,19 @@ ...@@ -1520,34 +1955,19 @@
"19 >50K >50K True" "19 >50K >50K True"
] ]
}, },
"execution_count": 40, "metadata": {
"metadata": {}, "tags": []
"output_type": "execute_result" },
"execution_count": 37
} }
],
"source": [
"import numpy as np\n",
"predict_df = test_df[:20].copy()\n",
"\n",
"pred_iter = model.predict(\n",
" lambda:easy_input_function(predict_df, label_key='income_bracket',\n",
" num_epochs=1, shuffle=False, batch_size=10))\n",
"\n",
"classes = np.array(['<=50K', '>50K'])\n",
"pred_class_id = []\n",
"for pred_dict in pred_iter:\n",
" pred_class_id.append(pred_dict['class_ids'])\n",
"\n",
"predict_df['predicted_class'] = classes[np.array(pred_class_id)]\n",
"predict_df['correct'] = predict_df['predicted_class'] == predict_df['income_bracket']\n",
"\n",
"clear_output()\n",
"predict_df[['income_bracket','predicted_class', 'correct']]"
] ]
}, },
{ {
"metadata": {
"id": "N_uCpFTicYxN",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"If you'd like to see a working end-to-end example, you can download our\n", "If you'd like to see a working end-to-end example, you can download our\n",
"[example code](https://github.com/tensorflow/models/tree/master/official/wide_deep/census_main.py)\n", "[example code](https://github.com/tensorflow/models/tree/master/official/wide_deep/census_main.py)\n",
...@@ -1568,28 +1988,16 @@ ...@@ -1568,28 +1988,16 @@
] ]
}, },
{ {
"metadata": {
"id": "cVv2HsqocYxO",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "68504270-5bcc-4a87-dbfa-7fd94cf54dff"
},
"cell_type": "code", "cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"accuracy: 0.84\n",
"accuracy_baseline: 0.76\n",
"auc: 0.89\n",
"auc_precision_recall: 0.70\n",
"average_loss: 0.35\n",
"global_step: 2036.00\n",
"label/mean: 0.24\n",
"loss: 22.29\n",
"precision: 0.69\n",
"prediction/mean: 0.24\n",
"recall: 0.56\n"
]
}
],
"source": [ "source": [
"#TODO(markdaoust): is the regularization strength here not working?\n", "#TODO(markdaoust): is the regularization strength here not working?\n",
"model = tf.estimator.LinearClassifier(\n", "model = tf.estimator.LinearClassifier(\n",
...@@ -1605,11 +2013,34 @@ ...@@ -1605,11 +2013,34 @@
"clear_output()\n", "clear_output()\n",
"for key in sorted(results):\n", "for key in sorted(results):\n",
" print('%s: %0.2f' % (key, results[key]))" " print('%s: %0.2f' % (key, results[key]))"
],
"execution_count": 38,
"outputs": [
{
"output_type": "stream",
"text": [
"accuracy: 0.84\n",
"accuracy_baseline: 0.76\n",
"auc: 0.89\n",
"auc_precision_recall: 0.70\n",
"average_loss: 0.35\n",
"global_step: 2036.00\n",
"label/mean: 0.24\n",
"loss: 22.28\n",
"precision: 0.70\n",
"prediction/mean: 0.24\n",
"recall: 0.55\n"
],
"name": "stdout"
}
] ]
}, },
{ {
"metadata": {
"id": "5AqvPEQwcYxU",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"One important difference between L1 and L2 regularization is that L1\n", "One important difference between L1 and L2 regularization is that L1\n",
"regularization tends to make model weights stay at zero, creating sparser\n", "regularization tends to make model weights stay at zero, creating sparser\n",
...@@ -1626,8 +2057,11 @@ ...@@ -1626,8 +2057,11 @@
] ]
}, },
{ {
"metadata": {
"id": "i5119iMWcYxU",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"<a id=\"how_it_works\"> </a>\n", "<a id=\"how_it_works\"> </a>\n",
"## How Logistic Regression Works\n", "## How Logistic Regression Works\n",
...@@ -1675,8 +2109,11 @@ ...@@ -1675,8 +2109,11 @@
] ]
}, },
{ {
"metadata": {
"id": "hbXuPYQIcYxV",
"colab_type": "text"
},
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {},
"source": [ "source": [
"## What Next\n", "## What Next\n",
"\n", "\n",
...@@ -1689,32 +2126,21 @@ ...@@ -1689,32 +2126,21 @@
] ]
}, },
{ {
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": { "metadata": {
"kernelspec": { "id": "jpdw2z5WcYxV",
"display_name": "Python 3", "colab_type": "code",
"language": "python", "colab": {
"name": "python3" "base_uri": "https://localhost:8080/",
"height": 17
}, },
"language_info": { "outputId": "403d18f6-d01e-47dc-dfc7-8c95d9a8ec34"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3"
}
}, },
"nbformat": 4, "cell_type": "code",
"nbformat_minor": 2 "source": [
""
],
"execution_count": 38,
"outputs": []
}
]
} }
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment