Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
b3f04bca
Commit
b3f04bca
authored
Nov 01, 2017
by
Mark Daoust
Committed by
Neal Wu
Nov 01, 2017
Browse files
Initialize examples directory. (#2546)
parent
ddebf55c
Changes
12
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
2219 additions
and
0 deletions
+2219
-0
samples/cookbook/regression/__init__.py
samples/cookbook/regression/__init__.py
+20
-0
samples/cookbook/regression/automobile_data.py
samples/cookbook/regression/automobile_data.py
+132
-0
samples/cookbook/regression/custom_regression.py
samples/cookbook/regression/custom_regression.py
+170
-0
samples/cookbook/regression/dnn_regression.py
samples/cookbook/regression/dnn_regression.py
+107
-0
samples/cookbook/regression/linear_regression.py
samples/cookbook/regression/linear_regression.py
+110
-0
samples/cookbook/regression/linear_regression_categorical.py
samples/cookbook/regression/linear_regression_categorical.py
+116
-0
samples/cookbook/regression/regression_test.py
samples/cookbook/regression/regression_test.py
+73
-0
samples/core/get_started/custom_estimator.py
samples/core/get_started/custom_estimator.py
+157
-0
samples/core/get_started/estimator_test.py
samples/core/get_started/estimator_test.py
+58
-0
samples/core/get_started/premade_estimator.py
samples/core/get_started/premade_estimator.py
+105
-0
samples/outreach/blogs/blog_estimators_dataset.py
samples/outreach/blogs/blog_estimators_dataset.py
+153
-0
samples/outreach/blogs/housing_prices.ipynb
samples/outreach/blogs/housing_prices.ipynb
+1018
-0
No files found.
samples/cookbook/regression/__init__.py
0 → 100644
View file @
b3f04bca
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A collection of regression examples using `Estimators`."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
samples/cookbook/regression/automobile_data.py
0 → 100644
View file @
b3f04bca
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A dataset loader for imports85.data."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
collections
import
numpy
as
np
import
tensorflow
as
tf
try
:
import
pandas
as
pd
# pylint: disable=g-import-not-at-top
except
ImportError
:
pass
URL
=
"https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data"
# Order is important for the csv-readers, so we use an OrderedDict here.
COLUMN_TYPES
=
collections
.
OrderedDict
([
(
"symboling"
,
int
),
(
"normalized-losses"
,
float
),
(
"make"
,
str
),
(
"fuel-type"
,
str
),
(
"aspiration"
,
str
),
(
"num-of-doors"
,
str
),
(
"body-style"
,
str
),
(
"drive-wheels"
,
str
),
(
"engine-location"
,
str
),
(
"wheel-base"
,
float
),
(
"length"
,
float
),
(
"width"
,
float
),
(
"height"
,
float
),
(
"curb-weight"
,
float
),
(
"engine-type"
,
str
),
(
"num-of-cylinders"
,
str
),
(
"engine-size"
,
float
),
(
"fuel-system"
,
str
),
(
"bore"
,
float
),
(
"stroke"
,
float
),
(
"compression-ratio"
,
float
),
(
"horsepower"
,
float
),
(
"peak-rpm"
,
float
),
(
"city-mpg"
,
float
),
(
"highway-mpg"
,
float
),
(
"price"
,
float
)
])
def
raw_dataframe
():
"""Load the imports85 data as a pd.DataFrame."""
# Download and cache the data
path
=
tf
.
keras
.
utils
.
get_file
(
URL
.
split
(
"/"
)[
-
1
],
URL
)
# Load it into a pandas dataframe
df
=
pd
.
read_csv
(
path
,
names
=
COLUMN_TYPES
.
keys
(),
dtype
=
COLUMN_TYPES
,
na_values
=
"?"
)
return
df
def
load_data
(
y_name
=
"price"
,
train_fraction
=
0.7
,
seed
=
None
):
"""Get the imports85 data set.
A description of the data is available at:
https://archive.ics.uci.edu/ml/datasets/automobile
The data itself can be found at:
https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data
Args:
y_name: the column to return as the label.
train_fraction: the fraction of the dataset to use for training.
seed: The random seed to use when shuffling the data. `None` generates a
unique shuffle every run.
Returns:
a pair of pairs where the first pair is the training data, and the second
is the test data:
`(x_train, y_train), (x_test, y_test) = get_imports85_dataset(...)`
`x` contains a pandas DataFrame of features, while `y` contains the label
array.
"""
# Load the raw data columns.
data
=
raw_dataframe
()
# Delete rows with unknowns
data
=
data
.
dropna
()
# Shuffle the data
np
.
random
.
seed
(
seed
)
# Split the data into train/test subsets.
x_train
=
data
.
sample
(
frac
=
train_fraction
,
random_state
=
seed
)
x_test
=
data
.
drop
(
x_train
.
index
)
# Extract the label from the features dataframe.
y_train
=
x_train
.
pop
(
y_name
)
y_test
=
x_test
.
pop
(
y_name
)
return
(
x_train
,
y_train
),
(
x_test
,
y_test
)
def
make_dataset
(
x
,
y
=
None
):
"""Create a slice dataset from a pandas DataFrame and labels"""
# TODO(markdaooust): simplify this after the 1.4 cut.
# Convert the DataFrame to a dict
x
=
dict
(
x
)
# Convert the pd.Series to np.arrays
for
key
in
x
:
x
[
key
]
=
np
.
array
(
x
[
key
])
items
=
[
x
]
if
y
is
not
None
:
items
.
append
(
np
.
array
(
y
,
dtype
=
np
.
float32
))
# Create a Dataset of slices
return
tf
.
data
.
Dataset
.
from_tensor_slices
(
tuple
(
items
))
samples/cookbook/regression/custom_regression.py
0 → 100644
View file @
b3f04bca
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Regression using the DNNRegressor Estimator."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
argparse
import
tensorflow
as
tf
import
automobile_data
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--batch_size'
,
default
=
100
,
type
=
int
,
help
=
'batch size'
)
parser
.
add_argument
(
'--train_steps'
,
default
=
1000
,
type
=
int
,
help
=
'number of training steps'
)
parser
.
add_argument
(
'--price_norm_factor'
,
default
=
1000.
,
type
=
float
,
help
=
'price normalization factor'
)
def
from_dataset
(
ds
):
return
lambda
:
ds
.
make_one_shot_iterator
().
get_next
()
def
my_dnn_regression_fn
(
features
,
labels
,
mode
,
params
):
"""A model function implementing DNN regression for a custom Estimator."""
# Extract the input into a dense layer, according to the feature_columns.
top
=
tf
.
feature_column
.
input_layer
(
features
,
params
[
"feature_columns"
])
# Iterate over the "hidden_units" list of layer sizes, default is [20].
for
units
in
params
.
get
(
"hidden_units"
,
[
20
]):
# Add a hidden layer, densely connected on top of the previous layer.
top
=
tf
.
layers
.
dense
(
inputs
=
top
,
units
=
units
,
activation
=
tf
.
nn
.
relu
)
# Connect a linear output layer on top.
output_layer
=
tf
.
layers
.
dense
(
inputs
=
top
,
units
=
1
)
# Reshape the output layer to a 1-dim Tensor to return predictions
predictions
=
tf
.
squeeze
(
output_layer
,
1
)
if
mode
==
tf
.
estimator
.
ModeKeys
.
PREDICT
:
# In `PREDICT` mode we only need to return predictions.
return
tf
.
estimator
.
EstimatorSpec
(
mode
=
mode
,
predictions
=
{
"price"
:
predictions
})
# Calculate loss using mean squared error
average_loss
=
tf
.
losses
.
mean_squared_error
(
labels
,
predictions
)
# Pre-made estimators use the total_loss instead of the average,
# so report total_loss for compatibility.
batch_size
=
tf
.
shape
(
labels
)[
0
]
total_loss
=
tf
.
to_float
(
batch_size
)
*
average_loss
if
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
optimizer
=
params
.
get
(
"optimizer"
,
tf
.
train
.
AdamOptimizer
)
optimizer
=
optimizer
(
params
.
get
(
"learning_rate"
,
None
))
train_op
=
optimizer
.
minimize
(
loss
=
average_loss
,
global_step
=
tf
.
train
.
get_global_step
())
return
tf
.
estimator
.
EstimatorSpec
(
mode
=
mode
,
loss
=
total_loss
,
train_op
=
train_op
)
# In evaluation mode we will calculate evaluation metrics.
assert
mode
==
tf
.
estimator
.
ModeKeys
.
EVAL
# Calculate root mean squared error
print
(
labels
)
print
(
predictions
)
rmse
=
tf
.
metrics
.
root_mean_squared_error
(
labels
,
predictions
)
# Add the rmse to the collection of evaluation metrics.
eval_metrics
=
{
"rmse"
:
rmse
}
return
tf
.
estimator
.
EstimatorSpec
(
mode
=
mode
,
# Report sum of error for compatibility with pre-made estimators
loss
=
total_loss
,
eval_metric_ops
=
eval_metrics
)
def
main
(
argv
):
"""Builds, trains, and evaluates the model."""
args
=
parser
.
parse_args
(
argv
[
1
:])
(
train_x
,
train_y
),
(
test_x
,
test_y
)
=
automobile_data
.
load_data
()
train_y
/=
args
.
price_norm_factor
test_y
/=
args
.
price_norm_factor
# Build the training dataset.
train
=
(
automobile_data
.
make_dataset
(
train_x
,
train_y
)
# Shuffling with a buffer larger than the data set ensures
# that the examples are well mixed.
.
shuffle
(
1000
).
batch
(
args
.
batch_size
)
# Repeat forever
.
repeat
())
# Build the validation dataset.
test
=
automobile_data
.
make_dataset
(
test_x
,
test_y
).
batch
(
args
.
batch_size
)
# The first way assigns a unique weight to each category. To do this you must
# specify the category's vocabulary (values outside this specification will
# receive a weight of zero). Here we specify the vocabulary using a list of
# options. The vocabulary can also be specified with a vocabulary file (using
# `categorical_column_with_vocabulary_file`). For features covering a
# range of positive integers use `categorical_column_with_identity`.
body_style_vocab
=
[
"hardtop"
,
"wagon"
,
"sedan"
,
"hatchback"
,
"convertible"
]
body_style
=
tf
.
feature_column
.
categorical_column_with_vocabulary_list
(
key
=
"body-style"
,
vocabulary_list
=
body_style_vocab
)
make
=
tf
.
feature_column
.
categorical_column_with_hash_bucket
(
key
=
"make"
,
hash_bucket_size
=
50
)
feature_columns
=
[
tf
.
feature_column
.
numeric_column
(
key
=
"curb-weight"
),
tf
.
feature_column
.
numeric_column
(
key
=
"highway-mpg"
),
# Since this is a DNN model, convert categorical columns from sparse
# to dense.
# Wrap them in an `indicator_column` to create a
# one-hot vector from the input.
tf
.
feature_column
.
indicator_column
(
body_style
),
# Or use an `embedding_column` to create a trainable vector for each
# index.
tf
.
feature_column
.
embedding_column
(
make
,
dimension
=
3
),
]
# Build a custom Estimator, using the model_fn.
# `params` is passed through to the `model_fn`.
model
=
tf
.
estimator
.
Estimator
(
model_fn
=
my_dnn_regression_fn
,
params
=
{
"feature_columns"
:
feature_columns
,
"learning_rate"
:
0.001
,
"optimizer"
:
tf
.
train
.
AdamOptimizer
,
"hidden_units"
:
[
20
,
20
]
})
# Train the model.
model
.
train
(
input_fn
=
from_dataset
(
train
),
steps
=
args
.
train_steps
)
# Evaluate how the model performs on data it has not yet seen.
eval_result
=
model
.
evaluate
(
input_fn
=
from_dataset
(
test
))
# Print the Root Mean Square Error (RMSE).
print
(
"
\n
"
+
80
*
"*"
)
print
(
"
\n
RMS error for the test set: ${:.0f}"
.
format
(
args
.
price_norm_factor
*
eval_result
[
"rmse"
]))
print
()
if
__name__
==
"__main__"
:
# The Estimator periodically generates "INFO" logs; make these logs visible.
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
tf
.
app
.
run
(
main
=
main
)
samples/cookbook/regression/dnn_regression.py
0 → 100644
View file @
b3f04bca
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Regression using the DNNRegressor Estimator."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
argparse
import
tensorflow
as
tf
import
automobile_data
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--batch_size'
,
default
=
100
,
type
=
int
,
help
=
'batch size'
)
parser
.
add_argument
(
'--train_steps'
,
default
=
5000
,
type
=
int
,
help
=
'number of training steps'
)
parser
.
add_argument
(
'--price_norm_factor'
,
default
=
1000.
,
type
=
float
,
help
=
'price normalization factor'
)
def
from_dataset
(
ds
):
return
lambda
:
ds
.
make_one_shot_iterator
().
get_next
()
def
main
(
argv
):
"""Builds, trains, and evaluates the model."""
args
=
parser
.
parse_args
(
argv
[
1
:])
(
train_x
,
train_y
),
(
test_x
,
test_y
)
=
automobile_data
.
load_data
()
train_y
/=
args
.
price_norm_factor
test_y
/=
args
.
price_norm_factor
# Build the training dataset.
train
=
(
automobile_data
.
make_dataset
(
train_x
,
train_y
)
# Shuffling with a buffer larger than the data set ensures
# that the examples are well mixed.
.
shuffle
(
1000
).
batch
(
args
.
batch_size
)
# Repeat forever
.
repeat
())
# Build the validation dataset.
test
=
automobile_data
.
make_dataset
(
test_x
,
test_y
).
batch
(
args
.
batch_size
)
# Use the same categorical columns as in `linear_regression_categorical`
body_style_vocab
=
[
"hardtop"
,
"wagon"
,
"sedan"
,
"hatchback"
,
"convertible"
]
body_style_column
=
tf
.
feature_column
.
categorical_column_with_vocabulary_list
(
key
=
"body-style"
,
vocabulary_list
=
body_style_vocab
)
make_column
=
tf
.
feature_column
.
categorical_column_with_hash_bucket
(
key
=
"make"
,
hash_bucket_size
=
50
)
feature_columns
=
[
tf
.
feature_column
.
numeric_column
(
key
=
"curb-weight"
),
tf
.
feature_column
.
numeric_column
(
key
=
"highway-mpg"
),
# Since this is a DNN model, categorical columns must be converted from
# sparse to dense.
# Wrap them in an `indicator_column` to create a
# one-hot vector from the input.
tf
.
feature_column
.
indicator_column
(
body_style_column
),
# Or use an `embedding_column` to create a trainable vector for each
# index.
tf
.
feature_column
.
embedding_column
(
make_column
,
dimension
=
3
),
]
# Build a DNNRegressor, with 2x20-unit hidden layers, with the feature columns
# defined above as input.
model
=
tf
.
estimator
.
DNNRegressor
(
hidden_units
=
[
20
,
20
],
feature_columns
=
feature_columns
)
# Train the model.
# By default, the Estimators log output every 100 steps.
model
.
train
(
input_fn
=
from_dataset
(
train
),
steps
=
args
.
train_steps
)
# Evaluate how the model performs on data it has not yet seen.
eval_result
=
model
.
evaluate
(
input_fn
=
from_dataset
(
test
))
# The evaluation returns a Python dictionary. The "average_loss" key holds the
# Mean Squared Error (MSE).
average_loss
=
eval_result
[
"average_loss"
]
# Convert MSE to Root Mean Square Error (RMSE).
print
(
"
\n
"
+
80
*
"*"
)
print
(
"
\n
RMS error for the test set: ${:.0f}"
.
format
(
args
.
price_norm_factor
*
average_loss
**
0.5
))
print
()
if
__name__
==
"__main__"
:
# The Estimator periodically generates "INFO" logs; make these logs visible.
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
tf
.
app
.
run
(
main
=
main
)
samples/cookbook/regression/linear_regression.py
0 → 100644
View file @
b3f04bca
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Linear regression using the LinearRegressor Estimator."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
argparse
import
numpy
as
np
import
tensorflow
as
tf
import
automobile_data
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--batch_size'
,
default
=
100
,
type
=
int
,
help
=
'batch size'
)
parser
.
add_argument
(
'--train_steps'
,
default
=
1000
,
type
=
int
,
help
=
'number of training steps'
)
parser
.
add_argument
(
'--price_norm_factor'
,
default
=
1000.
,
type
=
float
,
help
=
'price normalization factor'
)
def
from_dataset
(
ds
):
return
lambda
:
ds
.
make_one_shot_iterator
().
get_next
()
def
main
(
argv
):
"""Builds, trains, and evaluates the model."""
args
=
parser
.
parse_args
(
argv
[
1
:])
(
train_x
,
train_y
),
(
test_x
,
test_y
)
=
automobile_data
.
load_data
()
train_y
/=
args
.
price_norm_factor
test_y
/=
args
.
price_norm_factor
# Build the training dataset.
train
=
(
automobile_data
.
make_dataset
(
train_x
,
train_y
)
# Shuffling with a buffer larger than the data set ensures
# that the examples are well mixed.
.
shuffle
(
1000
).
batch
(
args
.
batch_size
)
# Repeat forever
.
repeat
())
# Build the validation dataset.
test
=
automobile_data
.
make_dataset
(
test_x
,
test_y
).
batch
(
args
.
batch_size
)
feature_columns
=
[
# "curb-weight" and "highway-mpg" are numeric columns.
tf
.
feature_column
.
numeric_column
(
key
=
"curb-weight"
),
tf
.
feature_column
.
numeric_column
(
key
=
"highway-mpg"
),
]
# Build the Estimator.
model
=
tf
.
estimator
.
LinearRegressor
(
feature_columns
=
feature_columns
)
# Train the model.
# By default, the Estimators log output every 100 steps.
model
.
train
(
input_fn
=
from_dataset
(
train
),
steps
=
args
.
train_steps
)
# Evaluate how the model performs on data it has not yet seen.
eval_result
=
model
.
evaluate
(
input_fn
=
from_dataset
(
test
))
# The evaluation returns a Python dictionary. The "average_loss" key holds the
# Mean Squared Error (MSE).
average_loss
=
eval_result
[
"average_loss"
]
# Convert MSE to Root Mean Square Error (RMSE).
print
(
"
\n
"
+
80
*
"*"
)
print
(
"
\n
RMS error for the test set: ${:.0f}"
.
format
(
args
.
price_norm_factor
*
average_loss
**
0.5
))
# Run the model in prediction mode.
input_dict
=
{
"curb-weight"
:
np
.
array
([
2000
,
3000
]),
"highway-mpg"
:
np
.
array
([
30
,
40
])
}
predict
=
automobile_data
.
make_dataset
(
input_dict
).
batch
(
1
)
predict_results
=
model
.
predict
(
input_fn
=
from_dataset
(
predict
))
# Print the prediction results.
print
(
"
\n
Prediction results:"
)
for
i
,
prediction
in
enumerate
(
predict_results
):
msg
=
(
"Curb weight: {: 4d}lbs, "
"Highway: {: 0d}mpg, "
"Prediction: ${: 9.2f}"
)
msg
=
msg
.
format
(
input_dict
[
"curb-weight"
][
i
],
input_dict
[
"highway-mpg"
][
i
],
args
.
price_norm_factor
*
prediction
[
"predictions"
][
0
])
print
(
" "
+
msg
)
print
()
if
__name__
==
"__main__"
:
# The Estimator periodically generates "INFO" logs; make these logs visible.
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
tf
.
app
.
run
(
main
=
main
)
samples/cookbook/regression/linear_regression_categorical.py
0 → 100644
View file @
b3f04bca
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Linear regression with categorical features."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
argparse
import
tensorflow
as
tf
import
automobile_data
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--batch_size'
,
default
=
100
,
type
=
int
,
help
=
'batch size'
)
parser
.
add_argument
(
'--train_steps'
,
default
=
1000
,
type
=
int
,
help
=
'number of training steps'
)
parser
.
add_argument
(
'--price_norm_factor'
,
default
=
1000.
,
type
=
float
,
help
=
'price normalization factor'
)
def
from_dataset
(
ds
):
return
lambda
:
ds
.
make_one_shot_iterator
().
get_next
()
def
main
(
argv
):
"""Builds, trains, and evaluates the model."""
args
=
parser
.
parse_args
(
argv
[
1
:])
(
train_x
,
train_y
),
(
test_x
,
test_y
)
=
automobile_data
.
load_data
()
train_y
/=
args
.
price_norm_factor
test_y
/=
args
.
price_norm_factor
# Build the training dataset.
train
=
(
automobile_data
.
make_dataset
(
train_x
,
train_y
)
# Shuffling with a buffer larger than the data set ensures
# that the examples are well mixed.
.
shuffle
(
1000
).
batch
(
args
.
batch_size
)
# Repeat forever
.
repeat
())
# Build the validation dataset.
test
=
automobile_data
.
make_dataset
(
test_x
,
test_y
).
batch
(
args
.
batch_size
)
# The following code demonstrates two of the ways that `feature_columns` can
# be used to build a model with categorical inputs.
# The first way assigns a unique weight to each category. To do this, you must
# specify the category's vocabulary (values outside this specification will
# receive a weight of zero).
# Alternatively, you can define the vocabulary in a file (by calling
# `categorical_column_with_vocabulary_file`) or as a range of positive
# integers (by calling `categorical_column_with_identity`)
body_style_vocab
=
[
"hardtop"
,
"wagon"
,
"sedan"
,
"hatchback"
,
"convertible"
]
body_style_column
=
tf
.
feature_column
.
categorical_column_with_vocabulary_list
(
key
=
"body-style"
,
vocabulary_list
=
body_style_vocab
)
# The second way, appropriate for an unspecified vocabulary, is to create a
# hashed column. It will create a fixed length list of weights, and
# automatically assign each input category to a weight. Due to the
# pseudo-randomness of the process, some weights may be shared between
# categories, while others will remain unused.
make_column
=
tf
.
feature_column
.
categorical_column_with_hash_bucket
(
key
=
"make"
,
hash_bucket_size
=
50
)
feature_columns
=
[
# This model uses the same two numeric features as `linear_regressor.py`
tf
.
feature_column
.
numeric_column
(
key
=
"curb-weight"
),
tf
.
feature_column
.
numeric_column
(
key
=
"highway-mpg"
),
# This model adds two categorical colums that will adjust the price based
# on "make" and "body-style".
body_style_column
,
make_column
,
]
# Build the Estimator.
model
=
tf
.
estimator
.
LinearRegressor
(
feature_columns
=
feature_columns
)
# Train the model.
# By default, the Estimators log output every 100 steps.
model
.
train
(
input_fn
=
from_dataset
(
train
),
steps
=
args
.
train_steps
)
# Evaluate how the model performs on data it has not yet seen.
eval_result
=
model
.
evaluate
(
input_fn
=
from_dataset
(
test
))
# The evaluation returns a Python dictionary. The "average_loss" key holds the
# Mean Squared Error (MSE).
average_loss
=
eval_result
[
"average_loss"
]
# Convert MSE to Root Mean Square Error (RMSE).
print
(
"
\n
"
+
80
*
"*"
)
print
(
"
\n
RMS error for the test set: ${:.0f}"
.
format
(
args
.
price_norm_factor
*
average_loss
**
0.5
))
print
()
if
__name__
==
"__main__"
:
# The Estimator periodically generates "INFO" logs; make these logs visible.
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
tf
.
app
.
run
(
main
=
main
)
samples/cookbook/regression/regression_test.py
0 → 100644
View file @
b3f04bca
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A simple smoke test that runs these examples for 1 training iteraton."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
pandas
as
pd
import
tensorflow
as
tf
from
six.moves
import
StringIO
import
automobile_data
import
dnn_regression
import
linear_regression
import
linear_regression_categorical
import
custom_regression
# pylint: disable=line-too-long
FOUR_LINES
=
"
\n
"
.
join
([
"1,?,alfa-romero,gas,std,two,hatchback,rwd,front,94.50,171.20,65.50,52.40,2823,ohcv,six,152,mpfi,2.68,3.47,9.00,154,5000,19,26,16500"
,
"2,164,audi,gas,std,four,sedan,fwd,front,99.80,176.60,66.20,54.30,2337,ohc,four,109,mpfi,3.19,3.40,10.00,102,5500,24,30,13950"
,
"2,164,audi,gas,std,four,sedan,4wd,front,99.40,176.60,66.40,54.30,2824,ohc,five,136,mpfi,3.19,3.40,8.00,115,5500,18,22,17450"
,
"2,?,audi,gas,std,two,sedan,fwd,front,99.80,177.30,66.30,53.10,2507,ohc,five,136,mpfi,3.19,3.40,8.50,110,5500,19,25,15250"
,])
# pylint: enable=line-too-long
mock
=
tf
.
test
.
mock
def
four_lines_dataframe
():
text
=
StringIO
(
FOUR_LINES
)
return
pd
.
read_csv
(
text
,
names
=
automobile_data
.
COLUMN_TYPES
.
keys
(),
dtype
=
automobile_data
.
COLUMN_TYPES
,
na_values
=
"?"
)
def
four_lines_dataset
(
*
args
,
**
kwargs
):
del
args
,
kwargs
return
tf
.
data
.
Dataset
.
from_tensor_slices
(
FOUR_LINES
.
split
(
"
\n
"
))
class
RegressionTest
(
tf
.
test
.
TestCase
):
"""Test the regression examples in this directory."""
@
mock
.
patch
.
dict
(
automobile_data
.
__dict__
,
{
"raw_dataframe"
:
four_lines_dataframe
})
def
test_linear_regression
(
self
):
linear_regression
.
main
([
None
,
"--train_steps=1"
])
@
mock
.
patch
.
dict
(
automobile_data
.
__dict__
,
{
"raw_dataframe"
:
four_lines_dataframe
})
def
test_linear_regression_categorical
(
self
):
linear_regression_categorical
.
main
([
None
,
"--train_steps=1"
])
@
mock
.
patch
.
dict
(
automobile_data
.
__dict__
,
{
"raw_dataframe"
:
four_lines_dataframe
})
def
test_dnn_regression
(
self
):
dnn_regression
.
main
([
None
,
"--train_steps=1"
])
@
mock
.
patch
.
dict
(
automobile_data
.
__dict__
,
{
"raw_dataframe"
:
four_lines_dataframe
})
def
test_custom_regression
(
self
):
custom_regression
.
main
([
None
,
"--train_steps=1"
])
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
samples/core/get_started/custom_estimator.py
0 → 100644
View file @
b3f04bca
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example of DNNClassifier for Iris plant dataset."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
argparse
import
pandas
as
pd
import
tensorflow
as
tf
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--batch_size'
,
default
=
100
,
type
=
int
,
help
=
'batch size'
)
parser
.
add_argument
(
'--train_steps'
,
default
=
200
,
type
=
int
,
help
=
'number of training steps'
)
TRAIN_URL
=
"http://download.tensorflow.org/data/iris_training.csv"
TEST_URL
=
"http://download.tensorflow.org/data/iris_test.csv"
COLUMNS
=
[
'SepalLength'
,
'SepalWidth'
,
'PetalLength'
,
'PetalWidth'
,
'Species'
]
SPECIES
=
[
'Sentosa'
,
'Versicolor'
,
'Virginica'
]
def
load_data
(
train_fraction
=
0.8
,
seed
=
0
,
y_name
=
'Species'
):
"""Returns the iris dataset as (train_x, train_y), (test_x, test_y)."""
train_path
=
tf
.
keras
.
utils
.
get_file
(
TRAIN_URL
.
split
(
'/'
)[
-
1
],
TRAIN_URL
)
train
=
pd
.
read_csv
(
train_path
,
names
=
COLUMNS
,
header
=
0
)
train_x
,
train_y
=
train
,
train
.
pop
(
y_name
)
test_path
=
tf
.
keras
.
utils
.
get_file
(
TEST_URL
.
split
(
'/'
)[
-
1
],
TEST_URL
)
test
=
pd
.
read_csv
(
test_path
,
names
=
COLUMNS
,
header
=
0
)
test_x
,
test_y
=
test
,
test
.
pop
(
y_name
)
return
(
train_x
,
train_y
),
(
test_x
,
test_y
)
def
make_dataset
(
*
inputs
):
return
tf
.
data
.
Dataset
.
from_tensor_slices
(
inputs
)
def
from_dataset
(
ds
):
return
lambda
:
ds
.
make_one_shot_iterator
().
get_next
()
def
my_model
(
features
,
labels
,
mode
,
params
):
"""DNN with three hidden layers, and dropout of 0.1 probability."""
# Create three fully connected layers each layer having a dropout
# probability of 0.1.
net
=
tf
.
feature_column
.
input_layer
(
features
,
params
[
'feature_columns'
])
for
units
in
params
.
get
(
'hidden_units'
,
[
10
,
20
,
10
]):
net
=
tf
.
layers
.
dense
(
net
,
units
=
units
,
activation
=
tf
.
nn
.
relu
)
net
=
tf
.
layers
.
dropout
(
net
,
rate
=
0.1
,
training
=
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
)
# Compute logits (1 per class).
logits
=
tf
.
layers
.
dense
(
net
,
params
[
'n_classes'
],
activation
=
None
)
# Compute predictions.
predicted_classes
=
tf
.
argmax
(
logits
,
1
)
if
mode
==
tf
.
estimator
.
ModeKeys
.
PREDICT
:
predictions
=
{
'class_ids'
:
predicted_classes
[:,
tf
.
newaxis
],
'probabilities'
:
tf
.
nn
.
softmax
(
logits
),
'logits'
:
logits
,
}
return
tf
.
estimator
.
EstimatorSpec
(
mode
,
predictions
=
predictions
)
# Convert the labels to a one-hot tensor of shape (length of features, 3)
# and with a on-value of 1 for each one-hot vector of length 3.
onehot_labels
=
tf
.
one_hot
(
labels
,
3
,
1
,
0
)
# Compute loss.
loss
=
tf
.
losses
.
softmax_cross_entropy
(
onehot_labels
=
onehot_labels
,
logits
=
logits
)
# Compute evaluation metrics.
accuracy
=
tf
.
metrics
.
accuracy
(
labels
=
labels
,
predictions
=
predicted_classes
,
name
=
'acc_op'
)
metrics
=
{
'accuracy'
:
accuracy
}
tf
.
summary
.
scalar
(
'accuracy'
,
accuracy
[
1
])
if
mode
==
tf
.
estimator
.
ModeKeys
.
EVAL
:
return
tf
.
estimator
.
EstimatorSpec
(
mode
,
loss
=
loss
,
eval_metric_ops
=
metrics
)
# Create training op.
assert
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
optimizer
=
tf
.
train
.
AdagradOptimizer
(
learning_rate
=
0.1
)
train_op
=
optimizer
.
minimize
(
loss
,
global_step
=
tf
.
train
.
get_global_step
())
return
tf
.
estimator
.
EstimatorSpec
(
mode
,
loss
=
loss
,
train_op
=
train_op
)
def
main
(
argv
):
args
=
parser
.
parse_args
(
argv
[
1
:])
# Fetch the data
(
train_x
,
train_y
),
(
test_x
,
test_y
)
=
load_data
()
train_x
=
dict
(
train_x
)
test_x
=
dict
(
test_x
)
# Feature columns describe the input: all columns are numeric.
feature_columns
=
[
tf
.
feature_column
.
numeric_column
(
col_name
)
for
col_name
in
COLUMNS
[:
-
1
]]
# Build 3 layer DNN with 10, 20, 10 units respectively.
classifier
=
tf
.
estimator
.
Estimator
(
model_fn
=
my_model
,
params
=
{
'feature_columns'
:
feature_columns
,
'hidden_units'
:
[
10
,
20
,
10
],
'n_classes'
:
3
,
})
# Train the Model.
train
=
(
make_dataset
(
train_x
,
train_y
)
.
repeat
()
.
shuffle
(
1000
)
.
batch
(
args
.
batch_size
))
classifier
.
train
(
input_fn
=
from_dataset
(
train
),
steps
=
args
.
train_steps
)
# Evaluate the model.
test
=
make_dataset
(
test_x
,
test_y
).
batch
(
args
.
batch_size
)
eval_result
=
classifier
.
evaluate
(
input_fn
=
from_dataset
(
test
))
print
(
'
\n
Test set accuracy: {accuracy:0.3f}
\n
'
.
format
(
**
eval_result
))
# Generate predictions from the model
predict_input
=
make_dataset
({
'SepalLength'
:
[
6.4
,
5.8
],
'SepalWidth'
:
[
3.2
,
3.1
],
'PetalLength'
:
[
4.5
,
5.0
],
'PetalWidth'
:
[
1.5
,
1.7
],
}).
batch
(
args
.
batch_size
)
for
p
in
classifier
.
predict
(
input_fn
=
from_dataset
(
predict_input
)):
template
=
(
'Prediction is "{}" ({:.1f}%)'
)
class_id
=
p
[
'class_ids'
][
0
]
probability
=
p
[
'probabilities'
][
class_id
]
print
(
template
.
format
(
SPECIES
[
class_id
],
100
*
probability
))
if
__name__
==
'__main__'
:
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
tf
.
app
.
run
(
main
)
samples/core/get_started/estimator_test.py
0 → 100644
View file @
b3f04bca
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A simple smoke test that runs these examples for 1 training iteraton."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
import
pandas
as
pd
from
six.moves
import
StringIO
import
custom_estimator
import
premade_estimator
FOUR_LINES
=
"
\n
"
.
join
([
"1,52.40, 2823,152,2"
,
"164, 99.80,176.60,66.20,1"
,
"176,2824, 136,3.19,0"
,
"2,177.30,66.30, 53.10,1"
,])
def
four_lines_data
():
text
=
StringIO
(
FOUR_LINES
)
df
=
pd
.
read_csv
(
text
,
names
=
premade_estimator
.
COLUMNS
)
xy
=
(
df
,
df
.
pop
(
"Species"
))
return
xy
,
xy
class
RegressionTest
(
tf
.
test
.
TestCase
):
"""Test the regression examples in this directory."""
@
tf
.
test
.
mock
.
patch
.
dict
(
premade_estimator
.
__dict__
,
{
"load_data"
:
four_lines_data
})
def
test_premade_estimator
(
self
):
premade_estimator
.
main
([
None
,
"--train_steps=1"
])
@
tf
.
test
.
mock
.
patch
.
dict
(
custom_estimator
.
__dict__
,
{
"load_data"
:
four_lines_data
})
def
test_custom_estimator
(
self
):
custom_estimator
.
main
([
None
,
"--train_steps=1"
])
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
samples/core/get_started/premade_estimator.py
0 → 100644
View file @
b3f04bca
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example of DNNClassifier for Iris plant dataset."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
argparse
import
pandas
as
pd
import
tensorflow
as
tf
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--batch_size'
,
default
=
100
,
type
=
int
,
help
=
'batch size'
)
parser
.
add_argument
(
'--train_steps'
,
default
=
200
,
type
=
int
,
help
=
'number of training steps'
)
TRAIN_URL
=
"http://download.tensorflow.org/data/iris_training.csv"
TEST_URL
=
"http://download.tensorflow.org/data/iris_test.csv"
COLUMNS
=
[
'SepalLength'
,
'SepalWidth'
,
'PetalLength'
,
'PetalWidth'
,
'Species'
]
SPECIES
=
[
'Sentosa'
,
'Versicolor'
,
'Virginica'
]
def
load_data
(
train_fraction
=
0.8
,
seed
=
0
,
y_name
=
'Species'
):
"""Returns the iris dataset as (train_x, train_y), (test_x, test_y)."""
train_path
=
tf
.
keras
.
utils
.
get_file
(
TRAIN_URL
.
split
(
'/'
)[
-
1
],
TRAIN_URL
)
train
=
pd
.
read_csv
(
train_path
,
names
=
COLUMNS
,
header
=
0
)
train_x
,
train_y
=
train
,
train
.
pop
(
y_name
)
test_path
=
tf
.
keras
.
utils
.
get_file
(
TEST_URL
.
split
(
'/'
)[
-
1
],
TEST_URL
)
test
=
pd
.
read_csv
(
test_path
,
names
=
COLUMNS
,
header
=
0
)
test_x
,
test_y
=
test
,
test
.
pop
(
y_name
)
return
(
train_x
,
train_y
),
(
test_x
,
test_y
)
def
make_dataset
(
*
inputs
):
return
tf
.
data
.
Dataset
.
from_tensor_slices
(
inputs
)
def
from_dataset
(
ds
):
return
lambda
:
ds
.
make_one_shot_iterator
().
get_next
()
def
main
(
argv
):
args
=
parser
.
parse_args
(
argv
[
1
:])
# Fetch the data
(
train_x
,
train_y
),
(
test_x
,
test_y
)
=
load_data
()
train_x
=
dict
(
train_x
)
test_x
=
dict
(
test_x
)
# Feature columns describe the input: all columns are numeric.
feature_columns
=
[
tf
.
feature_column
.
numeric_column
(
col_name
)
for
col_name
in
COLUMNS
[:
-
1
]]
# Build 3 layer DNN with 10, 20, 10 units respectively.
classifier
=
tf
.
estimator
.
DNNClassifier
(
feature_columns
=
feature_columns
,
hidden_units
=
[
10
,
20
,
10
],
n_classes
=
3
)
# Train the Model.
train
=
(
make_dataset
(
train_x
,
train_y
)
.
repeat
()
.
shuffle
(
1000
)
.
batch
(
args
.
batch_size
))
classifier
.
train
(
input_fn
=
from_dataset
(
train
),
steps
=
args
.
train_steps
)
# Evaluate the model.
test
=
make_dataset
(
test_x
,
test_y
).
batch
(
args
.
batch_size
)
eval_result
=
classifier
.
evaluate
(
input_fn
=
from_dataset
(
test
))
print
(
'
\n
Test set accuracy: {accuracy:0.3f}
\n
'
.
format
(
**
eval_result
))
# Generate predictions from the model
predict_input
=
make_dataset
({
'SepalLength'
:
[
6.4
,
5.8
],
'SepalWidth'
:
[
3.2
,
3.1
],
'PetalLength'
:
[
4.5
,
5.0
],
'PetalWidth'
:
[
1.5
,
1.7
],
}).
batch
(
args
.
batch_size
)
for
p
in
classifier
.
predict
(
input_fn
=
from_dataset
(
predict_input
)):
template
=
(
'Prediction is "{}" ({:.1f}%)'
)
class_id
=
p
[
'class_ids'
][
0
]
probability
=
p
[
'probabilities'
][
class_id
]
print
(
template
.
format
(
SPECIES
[
class_id
],
100
*
probability
))
if
__name__
==
'__main__'
:
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
tf
.
app
.
run
(
main
)
samples/outreach/blogs/blog_estimators_dataset.py
0 → 100644
View file @
b3f04bca
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# This is the complete code for the following blogpost:
# https://developers.googleblog.com/2017/09/introducing-tensorflow-datasets.html
# (https://goo.gl/Ujm2Ep)
import
os
import
six.moves.urllib.request
as
request
import
tensorflow
as
tf
# Check that we have correct TensorFlow version installed
tf_version
=
tf
.
__version__
print
(
"TensorFlow version: {}"
.
format
(
tf_version
))
assert
"1.3"
<=
tf_version
,
"TensorFlow r1.3 or later is needed"
# Windows users: You only need to change PATH, rest is platform independent
PATH
=
"/tmp/tf_dataset_and_estimator_apis"
# Fetch and store Training and Test dataset files
PATH_DATASET
=
PATH
+
os
.
sep
+
"dataset"
FILE_TRAIN
=
PATH_DATASET
+
os
.
sep
+
"iris_training.csv"
FILE_TEST
=
PATH_DATASET
+
os
.
sep
+
"iris_test.csv"
URL_TRAIN
=
"http://download.tensorflow.org/data/iris_training.csv"
URL_TEST
=
"http://download.tensorflow.org/data/iris_test.csv"
def
downloadDataset
(
url
,
file
):
if
not
os
.
path
.
exists
(
PATH_DATASET
):
os
.
makedirs
(
PATH_DATASET
)
if
not
os
.
path
.
exists
(
file
):
data
=
request
.
urlopen
(
url
).
read
()
with
open
(
file
,
"wb"
)
as
f
:
f
.
write
(
data
)
f
.
close
()
downloadDataset
(
URL_TRAIN
,
FILE_TRAIN
)
downloadDataset
(
URL_TEST
,
FILE_TEST
)
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
# The CSV features in our training & test data
feature_names
=
[
'SepalLength'
,
'SepalWidth'
,
'PetalLength'
,
'PetalWidth'
]
# Create an input function reading a file using the Dataset API
# Then provide the results to the Estimator API
def
my_input_fn
(
file_path
,
perform_shuffle
=
False
,
repeat_count
=
1
):
def
decode_csv
(
line
):
parsed_line
=
tf
.
decode_csv
(
line
,
[[
0.
],
[
0.
],
[
0.
],
[
0.
],
[
0
]])
label
=
parsed_line
[
-
1
:]
# Last element is the label
del
parsed_line
[
-
1
]
# Delete last element
features
=
parsed_line
# Everything but last elements are the features
d
=
dict
(
zip
(
feature_names
,
features
)),
label
return
d
dataset
=
(
tf
.
data
.
TextLineDataset
(
file_path
)
# Read text file
.
skip
(
1
)
# Skip header row
.
map
(
decode_csv
))
# Transform each elem by applying decode_csv fn
if
perform_shuffle
:
# Randomizes input using a window of 256 elements (read into memory)
dataset
=
dataset
.
shuffle
(
buffer_size
=
256
)
dataset
=
dataset
.
repeat
(
repeat_count
)
# Repeats dataset this # times
dataset
=
dataset
.
batch
(
32
)
# Batch size to use
iterator
=
dataset
.
make_one_shot_iterator
()
batch_features
,
batch_labels
=
iterator
.
get_next
()
return
batch_features
,
batch_labels
next_batch
=
my_input_fn
(
FILE_TRAIN
,
True
)
# Will return 32 random elements
# Create the feature_columns, which specifies the input to our model
# All our input features are numeric, so use numeric_column for each one
feature_columns
=
[
tf
.
feature_column
.
numeric_column
(
k
)
for
k
in
feature_names
]
# Create a deep neural network regression classifier
# Use the DNNClassifier pre-made estimator
classifier
=
tf
.
estimator
.
DNNClassifier
(
feature_columns
=
feature_columns
,
# The input features to our model
hidden_units
=
[
10
,
10
],
# Two layers, each with 10 neurons
n_classes
=
3
,
model_dir
=
PATH
)
# Path to where checkpoints etc are stored
# Train our model, use the previously function my_input_fn
# Input to training is a file with training example
# Stop training after 8 iterations of train data (epochs)
classifier
.
train
(
input_fn
=
lambda
:
my_input_fn
(
FILE_TRAIN
,
True
,
8
))
# Evaluate our model using the examples contained in FILE_TEST
# Return value will contain evaluation_metrics such as: loss & average_loss
evaluate_result
=
classifier
.
evaluate
(
input_fn
=
lambda
:
my_input_fn
(
FILE_TEST
,
False
,
4
))
print
(
"Evaluation results"
)
for
key
in
evaluate_result
:
print
(
" {}, was: {}"
.
format
(
key
,
evaluate_result
[
key
]))
# Predict the type of some Iris flowers.
# Let's predict the examples in FILE_TEST, repeat only once.
predict_results
=
classifier
.
predict
(
input_fn
=
lambda
:
my_input_fn
(
FILE_TEST
,
False
,
1
))
print
(
"Predictions on test file"
)
for
prediction
in
predict_results
:
# Will print the predicted class, i.e: 0, 1, or 2 if the prediction
# is Iris Sentosa, Vericolor, Virginica, respectively.
print
(
prediction
[
"class_ids"
][
0
])
# Let create a dataset for prediction
# We've taken the first 3 examples in FILE_TEST
prediction_input
=
[[
5.9
,
3.0
,
4.2
,
1.5
],
# -> 1, Iris Versicolor
[
6.9
,
3.1
,
5.4
,
2.1
],
# -> 2, Iris Virginica
[
5.1
,
3.3
,
1.7
,
0.5
]]
# -> 0, Iris Sentosa
def
new_input_fn
():
def
decode
(
x
):
x
=
tf
.
split
(
x
,
4
)
# Need to split into our 4 features
return
dict
(
zip
(
feature_names
,
x
))
# To build a dict of them
dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
prediction_input
)
dataset
=
dataset
.
map
(
decode
)
iterator
=
dataset
.
make_one_shot_iterator
()
next_feature_batch
=
iterator
.
get_next
()
return
next_feature_batch
,
None
# In prediction, we have no labels
# Predict all our prediction_input
predict_results
=
classifier
.
predict
(
input_fn
=
new_input_fn
)
# Print results
print
(
"Predictions:"
)
for
idx
,
prediction
in
enumerate
(
predict_results
):
type
=
prediction
[
"class_ids"
][
0
]
# Get the predicted class (index)
if
type
==
0
:
print
(
" I think: {}, is Iris Sentosa"
.
format
(
prediction_input
[
idx
]))
elif
type
==
1
:
print
(
" I think: {}, is Iris Versicolor"
.
format
(
prediction_input
[
idx
]))
else
:
print
(
" I think: {}, is Iris Virginica"
.
format
(
prediction_input
[
idx
]))
samples/outreach/blogs/housing_prices.ipynb
0 → 100644
View file @
b3f04bca
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment