Initialize examples directory. (#2546)

b3f04bca · Mark Daoust · Neal Wu · ddebf55c · b3f04bca · b3f04bca
Commit b3f04bca authored Nov 01, 2017 by Mark Daoust Committed by Neal Wu Nov 01, 2017
12 changed files
--- a/samples/cookbook/regression/__init__.py
+++ b/samples/cookbook/regression/__init__.py
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A collection of regression examples using `Estimators`."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
--- a/samples/cookbook/regression/automobile_data.py
+++ b/samples/cookbook/regression/automobile_data.py
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A dataset loader for imports85.data."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+import numpy as np
+import tensorflow as tf
+
+try:
+  import pandas as pd  # pylint: disable=g-import-not-at-top
+except ImportError:
+  pass
+
+
+URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data"
+
+# Order is important for the csv-readers, so we use an OrderedDict here.
+COLUMN_TYPES = collections.OrderedDict([
+    ("symboling", int),
+    ("normalized-losses", float),
+    ("make", str),
+    ("fuel-type", str),
+    ("aspiration", str),
+    ("num-of-doors", str),
+    ("body-style", str),
+    ("drive-wheels", str),
+    ("engine-location", str),
+    ("wheel-base", float),
+    ("length", float),
+    ("width", float),
+    ("height", float),
+    ("curb-weight", float),
+    ("engine-type", str),
+    ("num-of-cylinders", str),
+    ("engine-size", float),
+    ("fuel-system", str),
+    ("bore", float),
+    ("stroke", float),
+    ("compression-ratio", float),
+    ("horsepower", float),
+    ("peak-rpm", float),
+    ("city-mpg", float),
+    ("highway-mpg", float),
+    ("price", float)
+])
+
+
+def raw_dataframe():
+  """Load the imports85 data as a pd.DataFrame."""
+  # Download and cache the data
+  path = tf.keras.utils.get_file(URL.split("/")[-1], URL)
+
+  # Load it into a pandas dataframe
+  df = pd.read_csv(path, names=COLUMN_TYPES.keys(),
+                   dtype=COLUMN_TYPES, na_values="?")
+
+  return df
+
+
+def load_data(y_name="price", train_fraction=0.7, seed=None):
+  """Get the imports85 data set.
+
+  A description of the data is available at:
+    https://archive.ics.uci.edu/ml/datasets/automobile
+
+  The data itself can be found at:
+    https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data
+
+  Args:
+    y_name: the column to return as the label.
+    train_fraction: the fraction of the dataset to use for training.
+    seed: The random seed to use when shuffling the data. `None` generates a
+      unique shuffle every run.
+  Returns:
+    a pair of pairs where the first pair is the training data, and the second
+    is the test data:
+    `(x_train, y_train), (x_test, y_test) = get_imports85_dataset(...)`
+    `x` contains a pandas DataFrame of features, while `y` contains the label
+    array.
+  """
+  # Load the raw data columns.
+  data = raw_dataframe()
+
+  # Delete rows with unknowns
+  data = data.dropna()
+
+  # Shuffle the data
+  np.random.seed(seed)
+
+  # Split the data into train/test subsets.
+  x_train = data.sample(frac=train_fraction, random_state=seed)
+  x_test = data.drop(x_train.index)
+
+  # Extract the label from the features dataframe.
+  y_train = x_train.pop(y_name)
+  y_test = x_test.pop(y_name)
+
+  return (x_train, y_train), (x_test, y_test)
+
+def make_dataset(x, y=None):
+    """Create a slice dataset from a pandas DataFrame and labels"""
+    # TODO(markdaooust): simplify this after the 1.4 cut.
+    # Convert the DataFrame to a dict
+    x = dict(x)
+
+    # Convert the pd.Series to np.arrays
+    for key in x:
+        x[key] = np.array(x[key])
+
+    items = [x]
+    if y is not None:
+        items.append(np.array(y, dtype=np.float32))
+
+    # Create a Dataset of slices
+    return tf.data.Dataset.from_tensor_slices(tuple(items))
--- a/samples/cookbook/regression/custom_regression.py
+++ b/samples/cookbook/regression/custom_regression.py
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Regression using the DNNRegressor Estimator."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+
+import tensorflow as tf
+
+import automobile_data
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--batch_size', default=100, type=int, help='batch size')
+parser.add_argument('--train_steps', default=1000, type=int,
+                    help='number of training steps')
+parser.add_argument('--price_norm_factor', default=1000., type=float,
+                    help='price normalization factor')
+
+
+def from_dataset(ds):
+    return lambda: ds.make_one_shot_iterator().get_next()
+
+
+def my_dnn_regression_fn(features, labels, mode, params):
+  """A model function implementing DNN regression for a custom Estimator."""
+
+  # Extract the input into a dense layer, according to the feature_columns.
+  top = tf.feature_column.input_layer(features, params["feature_columns"])
+
+  # Iterate over the "hidden_units" list of layer sizes, default is [20].
+  for units in params.get("hidden_units", [20]):
+    # Add a hidden layer, densely connected on top of the previous layer.
+    top = tf.layers.dense(inputs=top, units=units, activation=tf.nn.relu)
+
+  # Connect a linear output layer on top.
+  output_layer = tf.layers.dense(inputs=top, units=1)
+
+  # Reshape the output layer to a 1-dim Tensor to return predictions
+  predictions = tf.squeeze(output_layer, 1)
+
+  if mode == tf.estimator.ModeKeys.PREDICT:
+    # In `PREDICT` mode we only need to return predictions.
+    return tf.estimator.EstimatorSpec(
+        mode=mode, predictions={"price": predictions})
+
+  # Calculate loss using mean squared error
+  average_loss = tf.losses.mean_squared_error(labels, predictions)
+
+  # Pre-made estimators use the total_loss instead of the average,
+  # so report total_loss for compatibility.
+  batch_size = tf.shape(labels)[0]
+  total_loss = tf.to_float(batch_size) * average_loss
+
+  if mode == tf.estimator.ModeKeys.TRAIN:
+    optimizer = params.get("optimizer", tf.train.AdamOptimizer)
+    optimizer = optimizer(params.get("learning_rate", None))
+    train_op = optimizer.minimize(
+        loss=average_loss, global_step=tf.train.get_global_step())
+
+    return tf.estimator.EstimatorSpec(
+        mode=mode, loss=total_loss, train_op=train_op)
+
+  # In evaluation mode we will calculate evaluation metrics.
+  assert mode == tf.estimator.ModeKeys.EVAL
+
+  # Calculate root mean squared error
+  print(labels)
+  print(predictions)
+  rmse = tf.metrics.root_mean_squared_error(labels, predictions)
+
+  # Add the rmse to the collection of evaluation metrics.
+  eval_metrics = {"rmse": rmse}
+
+  return tf.estimator.EstimatorSpec(
+      mode=mode,
+      # Report sum of error for compatibility with pre-made estimators
+      loss=total_loss,
+      eval_metric_ops=eval_metrics)
+
+
+def main(argv):
+  """Builds, trains, and evaluates the model."""
+  args = parser.parse_args(argv[1:])
+
+  (train_x,train_y), (test_x, test_y) = automobile_data.load_data()
+
+  train_y /= args.price_norm_factor
+  test_y /= args.price_norm_factor
+
+  # Build the training dataset.
+  train = (
+      automobile_data.make_dataset(train_x, train_y)
+      # Shuffling with a buffer larger than the data set ensures
+      # that the examples are well mixed.
+      .shuffle(1000).batch(args.batch_size)
+      # Repeat forever
+      .repeat())
+
+  # Build the validation dataset.
+  test = automobile_data.make_dataset(test_x, test_y).batch(args.batch_size)
+
+  # The first way assigns a unique weight to each category. To do this you must
+  # specify the category's vocabulary (values outside this specification will
+  # receive a weight of zero). Here we specify the vocabulary using a list of
+  # options. The vocabulary can also be specified with a vocabulary file (using
+  # `categorical_column_with_vocabulary_file`). For features covering a
+  # range of positive integers use `categorical_column_with_identity`.
+  body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
+  body_style = tf.feature_column.categorical_column_with_vocabulary_list(
+      key="body-style", vocabulary_list=body_style_vocab)
+  make = tf.feature_column.categorical_column_with_hash_bucket(
+      key="make", hash_bucket_size=50)
+
+  feature_columns = [
+      tf.feature_column.numeric_column(key="curb-weight"),
+      tf.feature_column.numeric_column(key="highway-mpg"),
+      # Since this is a DNN model, convert categorical columns from sparse
+      # to dense.
+      # Wrap them in an `indicator_column` to create a
+      # one-hot vector from the input.
+      tf.feature_column.indicator_column(body_style),
+      # Or use an `embedding_column` to create a trainable vector for each
+      # index.
+      tf.feature_column.embedding_column(make, dimension=3),
+  ]
+
+  # Build a custom Estimator, using the model_fn.
+  # `params` is passed through to the `model_fn`.
+  model = tf.estimator.Estimator(
+      model_fn=my_dnn_regression_fn,
+      params={
+          "feature_columns": feature_columns,
+          "learning_rate": 0.001,
+          "optimizer": tf.train.AdamOptimizer,
+          "hidden_units": [20, 20]
+      })
+
+  # Train the model.
+  model.train(input_fn=from_dataset(train), steps=args.train_steps)
+
+  # Evaluate how the model performs on data it has not yet seen.
+  eval_result = model.evaluate(input_fn=from_dataset(test))
+
+  # Print the Root Mean Square Error (RMSE).
+  print("\n" + 80 * "*")
+  print("\nRMS error for the test set: ${:.0f}"
+        .format(args.price_norm_factor * eval_result["rmse"]))
+
+  print()
+
+
+if __name__ == "__main__":
+  # The Estimator periodically generates "INFO" logs; make these logs visible.
+  tf.logging.set_verbosity(tf.logging.INFO)
+  tf.app.run(main=main)
--- a/samples/cookbook/regression/dnn_regression.py
+++ b/samples/cookbook/regression/dnn_regression.py
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Regression using the DNNRegressor Estimator."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+
+import tensorflow as tf
+
+import automobile_data
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--batch_size', default=100, type=int, help='batch size')
+parser.add_argument('--train_steps', default=5000, type=int,
+                    help='number of training steps')
+parser.add_argument('--price_norm_factor', default=1000., type=float,
+                    help='price normalization factor')
+
+
+def from_dataset(ds):
+    return lambda: ds.make_one_shot_iterator().get_next()
+
+
+def main(argv):
+  """Builds, trains, and evaluates the model."""
+  args = parser.parse_args(argv[1:])
+
+  (train_x,train_y), (test_x, test_y) = automobile_data.load_data()
+
+  train_y /= args.price_norm_factor
+  test_y /= args.price_norm_factor
+
+  # Build the training dataset.
+  train = (
+      automobile_data.make_dataset(train_x, train_y)
+      # Shuffling with a buffer larger than the data set ensures
+      # that the examples are well mixed.
+      .shuffle(1000).batch(args.batch_size)
+      # Repeat forever
+      .repeat())
+
+  # Build the validation dataset.
+  test = automobile_data.make_dataset(test_x, test_y).batch(args.batch_size)
+
+  # Use the same categorical columns as in `linear_regression_categorical`
+  body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
+  body_style_column = tf.feature_column.categorical_column_with_vocabulary_list(
+      key="body-style", vocabulary_list=body_style_vocab)
+  make_column = tf.feature_column.categorical_column_with_hash_bucket(
+      key="make", hash_bucket_size=50)
+
+  feature_columns = [
+      tf.feature_column.numeric_column(key="curb-weight"),
+      tf.feature_column.numeric_column(key="highway-mpg"),
+      # Since this is a DNN model, categorical columns must be converted from
+      # sparse to dense.
+      # Wrap them in an `indicator_column` to create a
+      # one-hot vector from the input.
+      tf.feature_column.indicator_column(body_style_column),
+      # Or use an `embedding_column` to create a trainable vector for each
+      # index.
+      tf.feature_column.embedding_column(make_column, dimension=3),
+  ]
+
+  # Build a DNNRegressor, with 2x20-unit hidden layers, with the feature columns
+  # defined above as input.
+  model = tf.estimator.DNNRegressor(
+      hidden_units=[20, 20], feature_columns=feature_columns)
+
+  # Train the model.
+  # By default, the Estimators log output every 100 steps.
+  model.train(input_fn=from_dataset(train), steps=args.train_steps)
+
+  # Evaluate how the model performs on data it has not yet seen.
+  eval_result = model.evaluate(input_fn=from_dataset(test))
+
+  # The evaluation returns a Python dictionary. The "average_loss" key holds the
+  # Mean Squared Error (MSE).
+  average_loss = eval_result["average_loss"]
+
+  # Convert MSE to Root Mean Square Error (RMSE).
+  print("\n" + 80 * "*")
+  print("\nRMS error for the test set: ${:.0f}"
+        .format(args.price_norm_factor * average_loss**0.5))
+
+  print()
+
+
+if __name__ == "__main__":
+  # The Estimator periodically generates "INFO" logs; make these logs visible.
+  tf.logging.set_verbosity(tf.logging.INFO)
+  tf.app.run(main=main)
--- a/samples/cookbook/regression/linear_regression.py
+++ b/samples/cookbook/regression/linear_regression.py
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Linear regression using the LinearRegressor Estimator."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+
+import numpy as np
+import tensorflow as tf
+
+import automobile_data
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--batch_size', default=100, type=int, help='batch size')
+parser.add_argument('--train_steps', default=1000, type=int,
+                    help='number of training steps')
+parser.add_argument('--price_norm_factor', default=1000., type=float,
+                    help='price normalization factor')
+
+
+def from_dataset(ds):
+    return lambda: ds.make_one_shot_iterator().get_next()
+
+
+def main(argv):
+  """Builds, trains, and evaluates the model."""
+  args = parser.parse_args(argv[1:])
+
+  (train_x,train_y), (test_x, test_y) = automobile_data.load_data()
+
+  train_y /= args.price_norm_factor
+  test_y /= args.price_norm_factor
+
+  # Build the training dataset.
+  train = (
+      automobile_data.make_dataset(train_x, train_y)
+      # Shuffling with a buffer larger than the data set ensures
+      # that the examples are well mixed.
+      .shuffle(1000).batch(args.batch_size)
+      # Repeat forever
+      .repeat())
+
+  # Build the validation dataset.
+  test = automobile_data.make_dataset(test_x, test_y).batch(args.batch_size)
+
+  feature_columns = [
+      # "curb-weight" and "highway-mpg" are numeric columns.
+      tf.feature_column.numeric_column(key="curb-weight"),
+      tf.feature_column.numeric_column(key="highway-mpg"),
+  ]
+
+  # Build the Estimator.
+  model = tf.estimator.LinearRegressor(feature_columns=feature_columns)
+
+  # Train the model.
+  # By default, the Estimators log output every 100 steps.
+  model.train(input_fn=from_dataset(train), steps=args.train_steps)
+
+  # Evaluate how the model performs on data it has not yet seen.
+  eval_result = model.evaluate(input_fn=from_dataset(test))
+
+  # The evaluation returns a Python dictionary. The "average_loss" key holds the
+  # Mean Squared Error (MSE).
+  average_loss = eval_result["average_loss"]
+
+  # Convert MSE to Root Mean Square Error (RMSE).
+  print("\n" + 80 * "*")
+  print("\nRMS error for the test set: ${:.0f}"
+        .format(args.price_norm_factor * average_loss**0.5))
+
+  # Run the model in prediction mode.
+  input_dict = {
+      "curb-weight": np.array([2000, 3000]),
+      "highway-mpg": np.array([30, 40])
+  }
+  predict = automobile_data.make_dataset(input_dict).batch(1)
+  predict_results = model.predict(input_fn=from_dataset(predict))
+
+  # Print the prediction results.
+  print("\nPrediction results:")
+  for i, prediction in enumerate(predict_results):
+    msg = ("Curb weight: {: 4d}lbs, "
+           "Highway: {: 0d}mpg, "
+           "Prediction: ${: 9.2f}")
+    msg = msg.format(input_dict["curb-weight"][i], input_dict["highway-mpg"][i],
+                     args.price_norm_factor * prediction["predictions"][0])
+
+    print("    " + msg)
+  print()
+
+
+if __name__ == "__main__":
+  # The Estimator periodically generates "INFO" logs; make these logs visible.
+  tf.logging.set_verbosity(tf.logging.INFO)
+  tf.app.run(main=main)
--- a/samples/cookbook/regression/linear_regression_categorical.py
+++ b/samples/cookbook/regression/linear_regression_categorical.py
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Linear regression with categorical features."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+
+import tensorflow as tf
+
+import automobile_data
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--batch_size', default=100, type=int, help='batch size')
+parser.add_argument('--train_steps', default=1000, type=int,
+                    help='number of training steps')
+parser.add_argument('--price_norm_factor', default=1000., type=float,
+                    help='price normalization factor')
+
+
+def from_dataset(ds):
+    return lambda: ds.make_one_shot_iterator().get_next()
+
+
+def main(argv):
+  """Builds, trains, and evaluates the model."""
+  args = parser.parse_args(argv[1:])
+
+  (train_x,train_y), (test_x, test_y) = automobile_data.load_data()
+
+  train_y /= args.price_norm_factor
+  test_y /= args.price_norm_factor
+
+  # Build the training dataset.
+  train = (
+      automobile_data.make_dataset(train_x, train_y)
+      # Shuffling with a buffer larger than the data set ensures
+      # that the examples are well mixed.
+      .shuffle(1000).batch(args.batch_size)
+      # Repeat forever
+      .repeat())
+
+  # Build the validation dataset.
+  test = automobile_data.make_dataset(test_x, test_y).batch(args.batch_size)
+
+  # The following code demonstrates two of the ways that `feature_columns` can
+  # be used to build a model with categorical inputs.
+
+  # The first way assigns a unique weight to each category. To do this, you must
+  # specify the category's vocabulary (values outside this specification will
+  # receive a weight of zero).
+  # Alternatively, you can define the vocabulary in a file (by calling
+  # `categorical_column_with_vocabulary_file`) or as a range of positive
+  # integers (by calling `categorical_column_with_identity`)
+  body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
+  body_style_column = tf.feature_column.categorical_column_with_vocabulary_list(
+      key="body-style", vocabulary_list=body_style_vocab)
+
+  # The second way, appropriate for an unspecified vocabulary, is to create a
+  # hashed column. It will create a fixed length list of weights, and
+  # automatically assign each input category to a weight. Due to the
+  # pseudo-randomness of the process, some weights may be shared between
+  # categories, while others will remain unused.
+  make_column = tf.feature_column.categorical_column_with_hash_bucket(
+      key="make", hash_bucket_size=50)
+
+  feature_columns = [
+      # This model uses the same two numeric features as `linear_regressor.py`
+      tf.feature_column.numeric_column(key="curb-weight"),
+      tf.feature_column.numeric_column(key="highway-mpg"),
+      # This model adds two categorical colums that will adjust the price based
+      # on "make" and "body-style".
+      body_style_column,
+      make_column,
+  ]
+
+  # Build the Estimator.
+  model = tf.estimator.LinearRegressor(feature_columns=feature_columns)
+
+  # Train the model.
+  # By default, the Estimators log output every 100 steps.
+  model.train(input_fn=from_dataset(train), steps=args.train_steps)
+
+  # Evaluate how the model performs on data it has not yet seen.
+  eval_result = model.evaluate(input_fn=from_dataset(test))
+
+  # The evaluation returns a Python dictionary. The "average_loss" key holds the
+  # Mean Squared Error (MSE).
+  average_loss = eval_result["average_loss"]
+
+  # Convert MSE to Root Mean Square Error (RMSE).
+  print("\n" + 80 * "*")
+  print("\nRMS error for the test set: ${:.0f}"
+        .format(args.price_norm_factor * average_loss**0.5))
+
+  print()
+
+
+if __name__ == "__main__":
+  # The Estimator periodically generates "INFO" logs; make these logs visible.
+  tf.logging.set_verbosity(tf.logging.INFO)
+  tf.app.run(main=main)
--- a/samples/cookbook/regression/regression_test.py
+++ b/samples/cookbook/regression/regression_test.py
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A simple smoke test that runs these examples for 1 training iteraton."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pandas as pd
+import tensorflow as tf
+from six.moves import StringIO
+
+import automobile_data
+
+import dnn_regression
+import linear_regression
+import linear_regression_categorical
+import custom_regression
+
+# pylint: disable=line-too-long
+FOUR_LINES = "\n".join([
+    "1,?,alfa-romero,gas,std,two,hatchback,rwd,front,94.50,171.20,65.50,52.40,2823,ohcv,six,152,mpfi,2.68,3.47,9.00,154,5000,19,26,16500",
+    "2,164,audi,gas,std,four,sedan,fwd,front,99.80,176.60,66.20,54.30,2337,ohc,four,109,mpfi,3.19,3.40,10.00,102,5500,24,30,13950",
+    "2,164,audi,gas,std,four,sedan,4wd,front,99.40,176.60,66.40,54.30,2824,ohc,five,136,mpfi,3.19,3.40,8.00,115,5500,18,22,17450",
+    "2,?,audi,gas,std,two,sedan,fwd,front,99.80,177.30,66.30,53.10,2507,ohc,five,136,mpfi,3.19,3.40,8.50,110,5500,19,25,15250",])
+# pylint: enable=line-too-long
+
+mock = tf.test.mock
+
+def four_lines_dataframe():
+  text = StringIO(FOUR_LINES)
+  return pd.read_csv(text, names=automobile_data.COLUMN_TYPES.keys(),
+                     dtype=automobile_data.COLUMN_TYPES, na_values="?")
+
+
+def four_lines_dataset(*args, **kwargs):
+  del args, kwargs
+  return tf.data.Dataset.from_tensor_slices(FOUR_LINES.split("\n"))
+
+
+class RegressionTest(tf.test.TestCase):
+  """Test the regression examples in this directory."""
+
+  @mock.patch.dict(automobile_data.__dict__, {"raw_dataframe": four_lines_dataframe})
+  def test_linear_regression(self):
+    linear_regression.main([None, "--train_steps=1"])
+
+  @mock.patch.dict(automobile_data.__dict__, {"raw_dataframe": four_lines_dataframe})
+  def test_linear_regression_categorical(self):
+    linear_regression_categorical.main([None, "--train_steps=1"])
+
+  @mock.patch.dict(automobile_data.__dict__, {"raw_dataframe": four_lines_dataframe})
+  def test_dnn_regression(self):
+    dnn_regression.main([None, "--train_steps=1"])
+
+  @mock.patch.dict(automobile_data.__dict__, {"raw_dataframe": four_lines_dataframe})
+  def test_custom_regression(self):
+    custom_regression.main([None, "--train_steps=1"])
+
+if __name__ == "__main__":
+  tf.test.main()
--- a/samples/core/get_started/custom_estimator.py
+++ b/samples/core/get_started/custom_estimator.py
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""Example of DNNClassifier for Iris plant dataset."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import pandas as pd
+import tensorflow as tf
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--batch_size', default=100, type=int, help='batch size')
+parser.add_argument('--train_steps', default=200, type=int,
+                    help='number of training steps')
+
+TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
+TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"
+
+COLUMNS = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species']
+SPECIES = ['Sentosa', 'Versicolor', 'Virginica']
+
+
+def load_data(train_fraction=0.8, seed=0, y_name='Species'):
+    """Returns the iris dataset as (train_x, train_y), (test_x, test_y)."""
+    train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1], TRAIN_URL)
+    train = pd.read_csv(train_path, names=COLUMNS, header=0)
+    train_x, train_y = train, train.pop(y_name)
+
+    test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)
+    test = pd.read_csv(test_path, names=COLUMNS, header=0)
+    test_x, test_y = test, test.pop(y_name)
+
+    return (train_x, train_y), (test_x, test_y)
+
+
+def make_dataset(*inputs):
+    return tf.data.Dataset.from_tensor_slices(inputs)
+
+
+def from_dataset(ds):
+    return lambda: ds.make_one_shot_iterator().get_next()
+
+
+def my_model(features, labels, mode, params):
+    """DNN with three hidden layers, and dropout of 0.1 probability."""
+    # Create three fully connected layers each layer having a dropout
+    # probability of 0.1.
+    net = tf.feature_column.input_layer(features, params['feature_columns'])
+    for units in params.get('hidden_units', [10, 20, 10]):
+        net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
+        net = tf.layers.dropout(net, rate=0.1,
+                                training=mode == tf.estimator.ModeKeys.TRAIN)
+
+    # Compute logits (1 per class).
+    logits = tf.layers.dense(net, params['n_classes'], activation=None)
+
+    # Compute predictions.
+    predicted_classes = tf.argmax(logits, 1)
+    if mode == tf.estimator.ModeKeys.PREDICT:
+        predictions = {
+            'class_ids': predicted_classes[:, tf.newaxis],
+            'probabilities': tf.nn.softmax(logits),
+            'logits': logits,
+        }
+        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
+
+    # Convert the labels to a one-hot tensor of shape (length of features, 3)
+    # and with a on-value of 1 for each one-hot vector of length 3.
+    onehot_labels = tf.one_hot(labels, 3, 1, 0)
+    # Compute loss.
+    loss = tf.losses.softmax_cross_entropy(
+        onehot_labels=onehot_labels, logits=logits)
+
+    # Compute evaluation metrics.
+    accuracy = tf.metrics.accuracy(labels=labels,
+                                   predictions=predicted_classes,
+                                   name='acc_op')
+    metrics = {'accuracy': accuracy}
+    tf.summary.scalar('accuracy', accuracy[1])
+
+    if mode == tf.estimator.ModeKeys.EVAL:
+        return tf.estimator.EstimatorSpec(
+            mode, loss=loss, eval_metric_ops=metrics)
+
+    # Create training op.
+    assert mode == tf.estimator.ModeKeys.TRAIN
+
+    optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
+    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
+    return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
+
+
+def main(argv):
+    args = parser.parse_args(argv[1:])
+
+    # Fetch the data
+    (train_x, train_y), (test_x, test_y) = load_data()
+    train_x = dict(train_x)
+    test_x = dict(test_x)
+
+    # Feature columns describe the input: all columns are numeric.
+    feature_columns = [tf.feature_column.numeric_column(col_name)
+                       for col_name in COLUMNS[:-1]]
+
+    # Build 3 layer DNN with 10, 20, 10 units respectively.
+    classifier = tf.estimator.Estimator(
+        model_fn=my_model,
+        params={
+            'feature_columns': feature_columns,
+            'hidden_units': [10, 20, 10],
+            'n_classes': 3,
+        })
+
+    # Train the Model.
+    train = (
+        make_dataset(train_x, train_y)
+        .repeat()
+        .shuffle(1000)
+        .batch(args.batch_size))
+    classifier.train(input_fn=from_dataset(train), steps=args.train_steps)
+
+    # Evaluate the model.
+    test = make_dataset(test_x, test_y).batch(args.batch_size)
+    eval_result = classifier.evaluate(input_fn=from_dataset(test))
+    print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))
+
+    # Generate predictions from the model
+    predict_input = make_dataset({
+        'SepalLength': [6.4, 5.8],
+        'SepalWidth': [3.2, 3.1],
+        'PetalLength': [4.5, 5.0],
+        'PetalWidth': [1.5, 1.7],
+    }).batch(args.batch_size)
+
+    for p in classifier.predict(input_fn=from_dataset(predict_input)):
+        template = ('Prediction is "{}" ({:.1f}%)')
+
+        class_id = p['class_ids'][0]
+        probability = p['probabilities'][class_id]
+        print(template.format(SPECIES[class_id], 100 * probability))
+
+
+if __name__ == '__main__':
+    tf.logging.set_verbosity(tf.logging.INFO)
+    tf.app.run(main)
--- a/samples/core/get_started/estimator_test.py
+++ b/samples/core/get_started/estimator_test.py
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A simple smoke test that runs these examples for 1 training iteraton."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+import pandas as pd
+
+from six.moves import StringIO
+
+import custom_estimator
+import premade_estimator
+
+FOUR_LINES = "\n".join([
+    "1,52.40, 2823,152,2",
+    "164, 99.80,176.60,66.20,1",
+    "176,2824, 136,3.19,0",
+    "2,177.30,66.30, 53.10,1",])
+
+def four_lines_data():
+  text = StringIO(FOUR_LINES)
+
+  df = pd.read_csv(text, names=premade_estimator.COLUMNS)
+
+  xy = (df, df.pop("Species"))
+  return xy, xy
+
+
+class RegressionTest(tf.test.TestCase):
+  """Test the regression examples in this directory."""
+
+  @tf.test.mock.patch.dict(premade_estimator.__dict__,
+                           {"load_data": four_lines_data})
+  def test_premade_estimator(self):
+    premade_estimator.main([None, "--train_steps=1"])
+
+  @tf.test.mock.patch.dict(custom_estimator.__dict__,
+                           {"load_data": four_lines_data})
+  def test_custom_estimator(self):
+    custom_estimator.main([None, "--train_steps=1"])
+
+if __name__ == "__main__":
+  tf.test.main()
--- a/samples/core/get_started/premade_estimator.py
+++ b/samples/core/get_started/premade_estimator.py
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""Example of DNNClassifier for Iris plant dataset."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import pandas as pd
+import tensorflow as tf
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--batch_size', default=100, type=int, help='batch size')
+parser.add_argument('--train_steps', default=200, type=int,
+                    help='number of training steps')
+
+TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
+TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"
+
+COLUMNS = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species']
+SPECIES = ['Sentosa', 'Versicolor', 'Virginica']
+
+
+def load_data(train_fraction=0.8, seed=0, y_name='Species'):
+    """Returns the iris dataset as (train_x, train_y), (test_x, test_y)."""
+    train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1], TRAIN_URL)
+    train = pd.read_csv(train_path, names=COLUMNS, header=0)
+    train_x, train_y = train, train.pop(y_name)
+
+    test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)
+    test = pd.read_csv(test_path, names=COLUMNS, header=0)
+    test_x, test_y = test, test.pop(y_name)
+
+    return (train_x, train_y), (test_x, test_y)
+
+
+def make_dataset(*inputs):
+    return tf.data.Dataset.from_tensor_slices(inputs)
+
+
+def from_dataset(ds):
+    return lambda: ds.make_one_shot_iterator().get_next()
+
+
+def main(argv):
+    args = parser.parse_args(argv[1:])
+
+    # Fetch the data
+    (train_x, train_y), (test_x, test_y) = load_data()
+    train_x = dict(train_x)
+    test_x = dict(test_x)
+
+    # Feature columns describe the input: all columns are numeric.
+    feature_columns = [tf.feature_column.numeric_column(col_name)
+                       for col_name in COLUMNS[:-1]]
+
+    # Build 3 layer DNN with 10, 20, 10 units respectively.
+    classifier = tf.estimator.DNNClassifier(
+        feature_columns=feature_columns,
+        hidden_units=[10, 20, 10],
+        n_classes=3)
+
+    # Train the Model.
+    train = (
+        make_dataset(train_x, train_y)
+        .repeat()
+        .shuffle(1000)
+        .batch(args.batch_size))
+    classifier.train(input_fn=from_dataset(train), steps=args.train_steps)
+
+    # Evaluate the model.
+    test = make_dataset(test_x, test_y).batch(args.batch_size)
+    eval_result = classifier.evaluate(input_fn=from_dataset(test))
+    print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))
+
+    # Generate predictions from the model
+    predict_input = make_dataset({
+        'SepalLength': [6.4, 5.8],
+        'SepalWidth': [3.2, 3.1],
+        'PetalLength': [4.5, 5.0],
+        'PetalWidth': [1.5, 1.7],
+    }).batch(args.batch_size)
+
+    for p in classifier.predict(input_fn=from_dataset(predict_input)):
+        template = ('Prediction is "{}" ({:.1f}%)')
+
+        class_id = p['class_ids'][0]
+        probability = p['probabilities'][class_id]
+        print(template.format(SPECIES[class_id], 100 * probability))
+
+
+if __name__ == '__main__':
+    tf.logging.set_verbosity(tf.logging.INFO)
+    tf.app.run(main)
--- a/samples/outreach/blogs/blog_estimators_dataset.py
+++ b/samples/outreach/blogs/blog_estimators_dataset.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# This is the complete code for the following blogpost:
+# https://developers.googleblog.com/2017/09/introducing-tensorflow-datasets.html
+#   (https://goo.gl/Ujm2Ep)
+
+import os
+
+import six.moves.urllib.request as request
+import tensorflow as tf
+
+# Check that we have correct TensorFlow version installed
+tf_version = tf.__version__
+print("TensorFlow version: {}".format(tf_version))
+assert "1.3" <= tf_version, "TensorFlow r1.3 or later is needed"
+
+# Windows users: You only need to change PATH, rest is platform independent
+PATH = "/tmp/tf_dataset_and_estimator_apis"
+
+# Fetch and store Training and Test dataset files
+PATH_DATASET = PATH + os.sep + "dataset"
+FILE_TRAIN = PATH_DATASET + os.sep + "iris_training.csv"
+FILE_TEST = PATH_DATASET + os.sep + "iris_test.csv"
+URL_TRAIN = "http://download.tensorflow.org/data/iris_training.csv"
+URL_TEST = "http://download.tensorflow.org/data/iris_test.csv"
+
+
+def downloadDataset(url, file):
+    if not os.path.exists(PATH_DATASET):
+        os.makedirs(PATH_DATASET)
+    if not os.path.exists(file):
+        data = request.urlopen(url).read()
+        with open(file, "wb") as f:
+            f.write(data)
+            f.close()
+downloadDataset(URL_TRAIN, FILE_TRAIN)
+downloadDataset(URL_TEST, FILE_TEST)
+
+tf.logging.set_verbosity(tf.logging.INFO)
+
+# The CSV features in our training & test data
+feature_names = [
+    'SepalLength',
+    'SepalWidth',
+    'PetalLength',
+    'PetalWidth']
+
+# Create an input function reading a file using the Dataset API
+# Then provide the results to the Estimator API
+
+
+def my_input_fn(file_path, perform_shuffle=False, repeat_count=1):
+    def decode_csv(line):
+        parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
+        label = parsed_line[-1:]  # Last element is the label
+        del parsed_line[-1]  # Delete last element
+        features = parsed_line  # Everything but last elements are the features
+        d = dict(zip(feature_names, features)), label
+        return d
+
+    dataset = (tf.data.TextLineDataset(file_path)  # Read text file
+               .skip(1)  # Skip header row
+               .map(decode_csv))  # Transform each elem by applying decode_csv fn
+    if perform_shuffle:
+        # Randomizes input using a window of 256 elements (read into memory)
+        dataset = dataset.shuffle(buffer_size=256)
+    dataset = dataset.repeat(repeat_count)  # Repeats dataset this # times
+    dataset = dataset.batch(32)  # Batch size to use
+    iterator = dataset.make_one_shot_iterator()
+    batch_features, batch_labels = iterator.get_next()
+    return batch_features, batch_labels
+
+next_batch = my_input_fn(FILE_TRAIN, True)  # Will return 32 random elements
+
+# Create the feature_columns, which specifies the input to our model
+# All our input features are numeric, so use numeric_column for each one
+feature_columns = [tf.feature_column.numeric_column(k) for k in feature_names]
+
+# Create a deep neural network regression classifier
+# Use the DNNClassifier pre-made estimator
+classifier = tf.estimator.DNNClassifier(
+    feature_columns=feature_columns,  # The input features to our model
+    hidden_units=[10, 10],  # Two layers, each with 10 neurons
+    n_classes=3,
+    model_dir=PATH)  # Path to where checkpoints etc are stored
+
+# Train our model, use the previously function my_input_fn
+# Input to training is a file with training example
+# Stop training after 8 iterations of train data (epochs)
+classifier.train(
+    input_fn=lambda: my_input_fn(FILE_TRAIN, True, 8))
+
+# Evaluate our model using the examples contained in FILE_TEST
+# Return value will contain evaluation_metrics such as: loss & average_loss
+evaluate_result = classifier.evaluate(
+    input_fn=lambda: my_input_fn(FILE_TEST, False, 4))
+print("Evaluation results")
+for key in evaluate_result:
+    print("   {}, was: {}".format(key, evaluate_result[key]))
+
+# Predict the type of some Iris flowers.
+# Let's predict the examples in FILE_TEST, repeat only once.
+predict_results = classifier.predict(
+    input_fn=lambda: my_input_fn(FILE_TEST, False, 1))
+print("Predictions on test file")
+for prediction in predict_results:
+    # Will print the predicted class, i.e: 0, 1, or 2 if the prediction
+    # is Iris Sentosa, Vericolor, Virginica, respectively.
+    print(prediction["class_ids"][0])
+
+# Let create a dataset for prediction
+# We've taken the first 3 examples in FILE_TEST
+prediction_input = [[5.9, 3.0, 4.2, 1.5],  # -> 1, Iris Versicolor
+                    [6.9, 3.1, 5.4, 2.1],  # -> 2, Iris Virginica
+                    [5.1, 3.3, 1.7, 0.5]]  # -> 0, Iris Sentosa
+
+def new_input_fn():
+    def decode(x):
+        x = tf.split(x, 4)  # Need to split into our 4 features
+        return dict(zip(feature_names, x))  # To build a dict of them
+
+    dataset = tf.data.Dataset.from_tensor_slices(prediction_input)
+    dataset = dataset.map(decode)
+    iterator = dataset.make_one_shot_iterator()
+    next_feature_batch = iterator.get_next()
+    return next_feature_batch, None  # In prediction, we have no labels
+
+# Predict all our prediction_input
+predict_results = classifier.predict(input_fn=new_input_fn)
+
+# Print results
+print("Predictions:")
+for idx, prediction in enumerate(predict_results):
+    type = prediction["class_ids"][0]  # Get the predicted class (index)
+    if type == 0:
+        print("  I think: {}, is Iris Sentosa".format(prediction_input[idx]))
+    elif type == 1:
+        print("  I think: {}, is Iris Versicolor".format(prediction_input[idx]))
+    else:
+        print("  I think: {}, is Iris Virginica".format(prediction_input[idx]))
--- a/samples/outreach/blogs/housing_prices.ipynb
+++ b/samples/outreach/blogs/housing_prices.ipynb