Unverified Commit de9f3584 authored by Mark Daoust's avatar Mark Daoust Committed by GitHub
Browse files

Merge pull request #4084 from XinyueZ/optimized/cookbook/regression/make_dataset

Fixed #4083 and two points of optimizations
parents 579ef7d6 4cfb259f
......@@ -109,19 +109,19 @@ def load_data(y_name="price", train_fraction=0.7, seed=None):
return (x_train, y_train), (x_test, y_test)
def make_dataset(x, y=None):
"""Create a slice Dataset from a pandas DataFrame and labels"""
# TODO(markdaooust): simplify this after the 1.4 cut.
# Convert the DataFrame to a dict
x = dict(x)
# Convert the pd.Series to np.arrays
for key in x:
x[key] = np.array(x[key])
def make_dataset(batch_sz, x, y=None, shuffle=False, shuffle_buffer_size=1000):
"""Create a slice Dataset from a pandas DataFrame and labels"""
items = [x]
def input_fn():
if y is not None:
items.append(np.array(y, dtype=np.float32))
# Create a Dataset of slices
return tf.data.Dataset.from_tensor_slices(tuple(items))
dataset = tf.data.Dataset.from_tensor_slices((dict(x), y))
else:
dataset = tf.data.Dataset.from_tensor_slices(dict(x))
if shuffle:
dataset = dataset.shuffle(shuffle_buffer_size).batch(batch_sz).repeat()
else:
dataset = dataset.batch(batch_sz)
return dataset.make_one_shot_iterator().get_next()
return input_fn
......@@ -31,11 +31,6 @@ parser.add_argument('--train_steps', default=1000, type=int,
parser.add_argument('--price_norm_factor', default=1000., type=float,
help='price normalization factor')
def from_dataset(ds):
return lambda: ds.make_one_shot_iterator().get_next()
def my_dnn_regression_fn(features, labels, mode, params):
"""A model function implementing DNN regression for a custom Estimator."""
......@@ -81,6 +76,10 @@ def my_dnn_regression_fn(features, labels, mode, params):
# Calculate root mean squared error
print(labels)
print(predictions)
# Fixed for #4083
predictions = tf.cast(predictions, tf.float64)
rmse = tf.metrics.root_mean_squared_error(labels, predictions)
# Add the rmse to the collection of evaluation metrics.
......@@ -102,17 +101,11 @@ def main(argv):
train_y /= args.price_norm_factor
test_y /= args.price_norm_factor
# Build the training dataset.
train = (
automobile_data.make_dataset(train_x, train_y)
# Shuffling with a buffer larger than the data set ensures
# that the examples are well mixed.
.shuffle(1000).batch(args.batch_size)
# Repeat forever
.repeat())
# Provide the training input dataset.
train_input_fn = automobile_data.make_dataset(args.batch_size, train_x, train_y, True, 1000)
# Build the validation dataset.
test = automobile_data.make_dataset(test_x, test_y).batch(args.batch_size)
test_input_fn = automobile_data.make_dataset(args.batch_size, test_x, test_y)
# The first way assigns a unique weight to each category. To do this you must
# specify the category's vocabulary (values outside this specification will
......@@ -151,10 +144,10 @@ def main(argv):
})
# Train the model.
model.train(input_fn=from_dataset(train), steps=args.train_steps)
model.train(input_fn=train_input_fn, steps=args.train_steps)
# Evaluate how the model performs on data it has not yet seen.
eval_result = model.evaluate(input_fn=from_dataset(test))
eval_result = model.evaluate(input_fn=test_input_fn)
# Print the Root Mean Square Error (RMSE).
print("\n" + 80 * "*")
......
......@@ -32,10 +32,6 @@ parser.add_argument('--price_norm_factor', default=1000., type=float,
help='price normalization factor')
def from_dataset(ds):
return lambda: ds.make_one_shot_iterator().get_next()
def main(argv):
"""Builds, trains, and evaluates the model."""
args = parser.parse_args(argv[1:])
......@@ -45,17 +41,11 @@ def main(argv):
train_y /= args.price_norm_factor
test_y /= args.price_norm_factor
# Build the training dataset.
train = (
automobile_data.make_dataset(train_x, train_y)
# Shuffling with a buffer larger than the data set ensures
# that the examples are well mixed.
.shuffle(1000).batch(args.batch_size)
# Repeat forever
.repeat())
# Provide the training input dataset.
train_input_fn = automobile_data.make_dataset(args.batch_size, train_x, train_y, True, 1000)
# Build the validation dataset.
test = automobile_data.make_dataset(test_x, test_y).batch(args.batch_size)
# Provide the validation input dataset.
test_input_fn = automobile_data.make_dataset(args.batch_size, test_x, test_y)
# Use the same categorical columns as in `linear_regression_categorical`
body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
......@@ -84,10 +74,10 @@ def main(argv):
# Train the model.
# By default, the Estimators log output every 100 steps.
model.train(input_fn=from_dataset(train), steps=args.train_steps)
model.train(input_fn=train_input_fn, steps=args.train_steps)
# Evaluate how the model performs on data it has not yet seen.
eval_result = model.evaluate(input_fn=from_dataset(test))
eval_result = model.evaluate(input_fn=test_input_fn)
# The evaluation returns a Python dictionary. The "average_loss" key holds the
# Mean Squared Error (MSE).
......
......@@ -33,10 +33,6 @@ parser.add_argument('--price_norm_factor', default=1000., type=float,
help='price normalization factor')
def from_dataset(ds):
return lambda: ds.make_one_shot_iterator().get_next()
def main(argv):
"""Builds, trains, and evaluates the model."""
args = parser.parse_args(argv[1:])
......@@ -46,17 +42,11 @@ def main(argv):
train_y /= args.price_norm_factor
test_y /= args.price_norm_factor
# Build the training dataset.
train = (
automobile_data.make_dataset(train_x, train_y)
# Shuffling with a buffer larger than the data set ensures
# that the examples are well mixed.
.shuffle(1000).batch(args.batch_size)
# Repeat forever
.repeat())
# Provide the training input dataset.
train_input_fn = automobile_data.make_dataset(args.batch_size, train_x, train_y, True, 1000)
# Build the validation dataset.
test = automobile_data.make_dataset(test_x, test_y).batch(args.batch_size)
# Provide the validation input dataset.
test_input_fn = automobile_data.make_dataset(args.batch_size, test_x, test_y)
feature_columns = [
# "curb-weight" and "highway-mpg" are numeric columns.
......@@ -69,10 +59,10 @@ def main(argv):
# Train the model.
# By default, the Estimators log output every 100 steps.
model.train(input_fn=from_dataset(train), steps=args.train_steps)
model.train(input_fn=train_input_fn, steps=args.train_steps)
# Evaluate how the model performs on data it has not yet seen.
eval_result = model.evaluate(input_fn=from_dataset(test))
eval_result = model.evaluate(input_fn=test_input_fn)
# The evaluation returns a Python dictionary. The "average_loss" key holds the
# Mean Squared Error (MSE).
......@@ -88,8 +78,10 @@ def main(argv):
"curb-weight": np.array([2000, 3000]),
"highway-mpg": np.array([30, 40])
}
predict = automobile_data.make_dataset(input_dict).batch(1)
predict_results = model.predict(input_fn=from_dataset(predict))
# Provide the predict input dataset.
predict_input_fn = automobile_data.make_dataset(1, input_dict)
predict_results = model.predict(input_fn=predict_input_fn)
# Print the prediction results.
print("\nPrediction results:")
......
......@@ -32,10 +32,6 @@ parser.add_argument('--price_norm_factor', default=1000., type=float,
help='price normalization factor')
def from_dataset(ds):
return lambda: ds.make_one_shot_iterator().get_next()
def main(argv):
"""Builds, trains, and evaluates the model."""
args = parser.parse_args(argv[1:])
......@@ -45,17 +41,11 @@ def main(argv):
train_y /= args.price_norm_factor
test_y /= args.price_norm_factor
# Build the training dataset.
train = (
automobile_data.make_dataset(train_x, train_y)
# Shuffling with a buffer larger than the data set ensures
# that the examples are well mixed.
.shuffle(1000).batch(args.batch_size)
# Repeat forever
.repeat())
# Provide the training input dataset.
train_input_fn = automobile_data.make_dataset(args.batch_size, train_x, train_y, True, 1000)
# Build the validation dataset.
test = automobile_data.make_dataset(test_x, test_y).batch(args.batch_size)
# Provide the validation input dataset.
test_input_fn = automobile_data.make_dataset(args.batch_size, test_x, test_y)
# The following code demonstrates two of the ways that `feature_columns` can
# be used to build a model with categorical inputs.
......@@ -93,10 +83,10 @@ def main(argv):
# Train the model.
# By default, the Estimators log output every 100 steps.
model.train(input_fn=from_dataset(train), steps=args.train_steps)
model.train(input_fn=train_input_fn, steps=args.train_steps)
# Evaluate how the model performs on data it has not yet seen.
eval_result = model.evaluate(input_fn=from_dataset(test))
eval_result = model.evaluate(input_fn=test_input_fn)
# The evaluation returns a Python dictionary. The "average_loss" key holds the
# Mean Squared Error (MSE).
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment