Commit 30fa4ebb authored by Eli Bixby's avatar Eli Bixby
Browse files

Intermediate commit for argparse move

parent 25fe395c
...@@ -34,8 +34,8 @@ data_batch_4 data_batch_5 readme.html test_batch ...@@ -34,8 +34,8 @@ data_batch_4 data_batch_5 readme.html test_batch
```shell ```shell
# This will generate a tf record for the training and test data available at the input_dir. # This will generate a tf record for the training and test data available at the input_dir.
# You can see more details in generate_cifar10_tf_records.py # You can see more details in generate_cifar10_tf_records.py
$ python generate_cifar10_tfrecords.py --input_dir=/prefix/to/downloaded/data/cifar-10-batches-py \ $ python generate_cifar10_tfrecords.py --input-dir=/prefix/to/downloaded/data/cifar-10-batches-py \
--output_dir=/prefix/to/downloaded/data/cifar-10-batches-py --output-dir=/prefix/to/downloaded/data/cifar-10-batches-py
``` ```
After running the command above, you should see the following new files in the output_dir. After running the command above, you should see the following new files in the output_dir.
...@@ -51,30 +51,30 @@ train.tfrecords validation.tfrecords eval.tfrecords ...@@ -51,30 +51,30 @@ train.tfrecords validation.tfrecords eval.tfrecords
``` ```
# Run the model on CPU only. After training, it runs the evaluation. # Run the model on CPU only. After training, it runs the evaluation.
$ python cifar10_main.py --data_dir=/prefix/to/downloaded/data/cifar-10-batches-py \ $ python cifar10_main.py --data-dir=/prefix/to/downloaded/data/cifar-10-batches-py \
--model_dir=/tmp/cifar10 \ --job-dir=/tmp/cifar10 \
--is_cpu_ps=True \ --is-cpu-ps=True \
--num_gpus=0 \ --num-gpus=0 \
--train_steps=1000 --train-steps=1000
# Run the model on 2 GPUs using CPU as parameter server. After training, it runs the evaluation. # Run the model on 2 GPUs using CPU as parameter server. After training, it runs the evaluation.
$ python cifar10_main.py --data_dir=/prefix/to/downloaded/data/cifar-10-batches-py \ $ python cifar10_main.py --data-dir=/prefix/to/downloaded/data/cifar-10-batches-py \
--model_dir=/tmp/cifar10 \ --job-dir=/tmp/cifar10 \
--is_cpu_ps=True \ --is-cpu-ps=True \
--force_gpu_compatible=True \ --force-gpu-compatible=True \
--num_gpus=2 \ --num-gpus=2 \
--train_steps=1000 --train-steps=1000
# Run the model on 2 GPUs using GPU as parameter server. # Run the model on 2 GPUs using GPU as parameter server.
# It will run an experiment, which for local setting basically means it will run stop training # It will run an experiment, which for local setting basically means it will run stop training
# a couple of times to perform evaluation. # a couple of times to perform evaluation.
$ python cifar10_main.py --data_dir=/prefix/to/downloaded/data/cifar-10-batches-bin \ $ python cifar10_main.py --data-dir=/prefix/to/downloaded/data/cifar-10-batches-bin \
--model_dir=/tmp/cifar10 \ --job-dir=/tmp/cifar10 \
--is_cpu_ps=False \ --is-cpu-ps=False \
--force_gpu_compatible=True \ --force-gpu-compatible=True \
--num_gpus=2 \ --num-gpus=2 \
--train_steps=1000 --train-steps=1000
--run_experiment=True
# There are more command line flags to play with; check cifar10_main.py for details. # There are more command line flags to play with; check cifar10_main.py for details.
``` ```
...@@ -105,13 +105,13 @@ gcloud ml-engine jobs submit training cifarmultigpu \ ...@@ -105,13 +105,13 @@ gcloud ml-engine jobs submit training cifarmultigpu \
--region us-central1 \ --region us-central1 \
--module-name cifar10_estimator.cifar10_main \ --module-name cifar10_estimator.cifar10_main \
-- \ -- \
--data_dir=$MY_BUCKET/cifar-10-batches-py \ --data-dir=$MY_BUCKET/cifar-10-batches-py \
--model_dir=$MY_BUCKLET/model_dirs/cifarmultigpu \ --job-dir=$MY_BUCKLET/model_dirs/cifarmultigpu \
--is_cpu_ps=True \ --is-cpu-ps=True \
--force_gpu_compatible=True \ --force-gpu-compatible=True \
--num_gpus=4 \ --num-gpus=4 \
--train_steps=1000 \ --train-steps=1000 \
--run_experiment=True
``` ```
...@@ -188,15 +188,15 @@ Once you have a `TF_CONFIG` configured properly on each host you're ready to run ...@@ -188,15 +188,15 @@ Once you have a `TF_CONFIG` configured properly on each host you're ready to run
# It will run evaluation a couple of times during training. # It will run evaluation a couple of times during training.
# The num_workers arugument is used only to update the learning rate correctly. # The num_workers arugument is used only to update the learning rate correctly.
# Make sure the model_dir is the same as defined on the TF_CONFIG. # Make sure the model_dir is the same as defined on the TF_CONFIG.
$ python cifar10_main.py --data_dir=gs://path/cifar-10-batches-py \ $ python cifar10_main.py --data-dir=gs://path/cifar-10-batches-py \
--model_dir=gs://path/model_dir/ \ --job-dir=gs://path/model_dir/ \
--is_cpu_ps=True \ --is-cpu-ps=True \
--force_gpu_compatible=True \ --force-gpu-compatible=True \
--num_gpus=4 \ --num-gpus=4 \
--train_steps=40000 \ --train-steps=40000 \
--sync=True \ --sync=True \
--run_experiment=True \ \
--num_workers=2 --num-workers=2
``` ```
*Output:* *Output:*
...@@ -331,14 +331,13 @@ INFO:tensorflow:Saving dict for global step 1: accuracy = 0.0994, global_step = ...@@ -331,14 +331,13 @@ INFO:tensorflow:Saving dict for global step 1: accuracy = 0.0994, global_step =
# Runs an Experiment in sync mode on 4 GPUs using CPU as parameter server for 40000 steps. # Runs an Experiment in sync mode on 4 GPUs using CPU as parameter server for 40000 steps.
# It will run evaluation a couple of times during training. # It will run evaluation a couple of times during training.
# Make sure the model_dir is the same as defined on the TF_CONFIG. # Make sure the model_dir is the same as defined on the TF_CONFIG.
$ python cifar10_main.py --data_dir=gs://path/cifar-10-batches-py \ $ python cifar10_main.py --data-dir=gs://path/cifar-10-batches-py \
--model_dir=gs://path/model_dir/ \ --job-dir=gs://path/model_dir/ \
--is_cpu_ps=True \ --is-cpu-ps=True \
--force_gpu_compatible=True \ --force-gpu-compatible=True \
--num_gpus=4 \ --num-gpus=4 \
--train_steps=40000 \ --train-steps=40000 \
--sync=True --sync=True
--run_experiment=True
``` ```
*Output:* *Output:*
...@@ -447,7 +446,7 @@ INFO:tensorflow:loss = 27.8453, step = 179 (18.893 sec) ...@@ -447,7 +446,7 @@ INFO:tensorflow:loss = 27.8453, step = 179 (18.893 sec)
```shell ```shell
# Run this on ps: # Run this on ps:
# The ps will not do training so most of the arguments won't affect the execution # The ps will not do training so most of the arguments won't affect the execution
$ python cifar10_main.py --run_experiment=True --model_dir=gs://path/model_dir/ $ python cifar10_main.py --job-dir=gs://path/model_dir/
# There are more command line flags to play with; check cifar10_main.py for details. # There are more command line flags to play with; check cifar10_main.py for details.
``` ```
...@@ -480,7 +479,7 @@ You'll see something similar to this if you "point" TensorBoard to the `model_di ...@@ -480,7 +479,7 @@ You'll see something similar to this if you "point" TensorBoard to the `model_di
# Check TensorBoard during training or after it. # Check TensorBoard during training or after it.
# Just point TensorBoard to the model_dir you chose on the previous step # Just point TensorBoard to the model_dir you chose on the previous step
# by default the model_dir is "sentiment_analysis_output" # by default the model_dir is "sentiment_analysis_output"
$ tensorboard --log_dir="sentiment_analysis_output" $ tensorboard --log-dir="sentiment_analysis_output"
``` ```
## Warnings ## Warnings
......
...@@ -22,19 +22,15 @@ from __future__ import absolute_import ...@@ -22,19 +22,15 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import argparse
import cPickle import cPickle
import os import os
import tensorflow as tf import tensorflow as tf
FLAGS = tf.flags.FLAGS FLAGS = None
tf.flags.DEFINE_string('input_dir', '',
'Directory where CIFAR10 data is located.')
tf.flags.DEFINE_string('output_dir', '',
'Directory where TFRecords will be saved.'
'The TFRecords will have the same name as'
' the CIFAR10 inputs + .tfrecords.')
def _int64_feature(value): def _int64_feature(value):
...@@ -91,4 +87,22 @@ def main(unused_argv): ...@@ -91,4 +87,22 @@ def main(unused_argv):
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--input_dir',
type=str,
default='',
help='Directory where CIFAR10 data is located.'
)
parser.add_argument(
'--output_dir',
type=str,
default='',
help="""\
Directory where TFRecords will be saved.The TFRecords will have the same
name as the CIFAR10 inputs + .tfrecords.\
"""
)
FLAGS = parser.parse_args()
tf.app.run(main) tf.app.run(main)
...@@ -23,12 +23,12 @@ from __future__ import absolute_import ...@@ -23,12 +23,12 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import argparse
import tensorflow as tf import tensorflow as tf
FLAGS = tf.flags.FLAGS FLAGS = None
tf.flags.DEFINE_float('batch_norm_decay', 0.997, 'Decay for batch norm.')
tf.flags.DEFINE_float('batch_norm_epsilon', 1e-5, 'Epsilon for batch norm.')
class ResNet(object): class ResNet(object):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment