Commit aadf299d authored by Toby Boyd's avatar Toby Boyd
Browse files

Merge branch 'master' into cifar_mkl

parents add2845a b8e7ff1c
...@@ -34,6 +34,7 @@ the scored human judgement. ...@@ -34,6 +34,7 @@ the scored human judgement.
""" """
from __future__ import print_function
import scipy.stats import scipy.stats
import sys import sys
from getopt import GetoptError, getopt from getopt import GetoptError, getopt
...@@ -42,8 +43,8 @@ from vecs import Vecs ...@@ -42,8 +43,8 @@ from vecs import Vecs
try: try:
opts, args = getopt(sys.argv[1:], '', ['embeddings=', 'vocab=']) opts, args = getopt(sys.argv[1:], '', ['embeddings=', 'vocab='])
except GetoptError, e: except GetoptError as e:
print >> sys.stderr, e print(e, file=sys.stderr)
sys.exit(2) sys.exit(2)
opt_embeddings = None opt_embeddings = None
...@@ -56,19 +57,20 @@ for o, a in opts: ...@@ -56,19 +57,20 @@ for o, a in opts:
opt_vocab = a opt_vocab = a
if not opt_vocab: if not opt_vocab:
print >> sys.stderr, 'please specify a vocabulary file with "--vocab"' print('please specify a vocabulary file with "--vocab"', file=sys.stderr)
sys.exit(2) sys.exit(2)
if not opt_embeddings: if not opt_embeddings:
print >> sys.stderr, 'please specify the embeddings with "--embeddings"' print('please specify the embeddings with "--embeddings"', file=sys.stderr)
sys.exit(2) sys.exit(2)
try: try:
vecs = Vecs(opt_vocab, opt_embeddings) vecs = Vecs(opt_vocab, opt_embeddings)
except IOError, e: except IOError as e:
print >> sys.stderr, e print(e, file=sys.stderr)
sys.exit(1) sys.exit(1)
def evaluate(lines): def evaluate(lines):
acts, preds = [], [] acts, preds = [], []
...@@ -85,6 +87,7 @@ def evaluate(lines): ...@@ -85,6 +87,7 @@ def evaluate(lines):
rho, _ = scipy.stats.spearmanr(acts, preds) rho, _ = scipy.stats.spearmanr(acts, preds)
return rho return rho
for filename in args: for filename in args:
with open(filename, 'r') as lines: with open(filename, 'r') as lines:
print '%0.3f %s' % (evaluate(lines), filename) print('%0.3f %s' % (evaluate(lines), filename))
...@@ -59,13 +59,13 @@ def get_model_fn(num_gpus, variable_strategy, data_format, num_workers): ...@@ -59,13 +59,13 @@ def get_model_fn(num_gpus, variable_strategy, data_format, num_workers):
features: a list of tensors, one for each tower features: a list of tensors, one for each tower
labels: a list of tensors, one for each tower labels: a list of tensors, one for each tower
mode: ModeKeys.TRAIN or EVAL mode: ModeKeys.TRAIN or EVAL
params: Dictionary of Hyperparameters suitable for tuning params: Hyperparameters suitable for tuning
Returns: Returns:
A EstimatorSpec object. A EstimatorSpec object.
""" """
is_training = (mode == tf.estimator.ModeKeys.TRAIN) is_training = (mode == tf.estimator.ModeKeys.TRAIN)
weight_decay = params['weight_decay'] weight_decay = params.weight_decay
momentum = params['momentum'] momentum = params.momentum
tower_features = features tower_features = features
tower_labels = labels tower_labels = labels
...@@ -96,8 +96,8 @@ def get_model_fn(num_gpus, variable_strategy, data_format, num_workers): ...@@ -96,8 +96,8 @@ def get_model_fn(num_gpus, variable_strategy, data_format, num_workers):
with tf.device(device_setter): with tf.device(device_setter):
loss, gradvars, preds = _tower_fn( loss, gradvars, preds = _tower_fn(
is_training, weight_decay, tower_features[i], tower_labels[i], is_training, weight_decay, tower_features[i], tower_labels[i],
data_format, params['num_layers'], params['batch_norm_decay'], data_format, params.num_layers, params.batch_norm_decay,
params['batch_norm_epsilon']) params.batch_norm_epsilon)
tower_losses.append(loss) tower_losses.append(loss)
tower_gradvars.append(gradvars) tower_gradvars.append(gradvars)
tower_preds.append(preds) tower_preds.append(preds)
...@@ -132,14 +132,13 @@ def get_model_fn(num_gpus, variable_strategy, data_format, num_workers): ...@@ -132,14 +132,13 @@ def get_model_fn(num_gpus, variable_strategy, data_format, num_workers):
with tf.device(consolidation_device): with tf.device(consolidation_device):
# Suggested learning rate scheduling from # Suggested learning rate scheduling from
# https://github.com/ppwwyyxx/tensorpack/blob/master/examples/ResNet/cifar10-resnet.py#L155 # https://github.com/ppwwyyxx/tensorpack/blob/master/examples/ResNet/cifar10-resnet.py#L155
# users could apply other scheduling.
num_batches_per_epoch = cifar10.Cifar10DataSet.num_examples_per_epoch( num_batches_per_epoch = cifar10.Cifar10DataSet.num_examples_per_epoch(
'train') // (params['train_batch_size'] * num_workers) 'train') // (params.train_batch_size * num_workers)
boundaries = [ boundaries = [
num_batches_per_epoch * x num_batches_per_epoch * x
for x in np.array([82, 123, 300], dtype=np.int64) for x in np.array([82, 123, 300], dtype=np.int64)
] ]
staged_lr = [params['learning_rate'] * x for x in [1, 0.1, 0.01, 0.002]] staged_lr = [params.learning_rate * x for x in [1, 0.1, 0.01, 0.002]]
learning_rate = tf.train.piecewise_constant(tf.train.get_global_step(), learning_rate = tf.train.piecewise_constant(tf.train.get_global_step(),
boundaries, staged_lr) boundaries, staged_lr)
...@@ -150,7 +149,7 @@ def get_model_fn(num_gpus, variable_strategy, data_format, num_workers): ...@@ -150,7 +149,7 @@ def get_model_fn(num_gpus, variable_strategy, data_format, num_workers):
learning_rate=learning_rate, momentum=momentum) learning_rate=learning_rate, momentum=momentum)
chief_hooks = [] chief_hooks = []
if params['sync']: if params.sync:
optimizer = tf.train.SyncReplicasOptimizer( optimizer = tf.train.SyncReplicasOptimizer(
optimizer, replicas_to_aggregate=num_workers) optimizer, replicas_to_aggregate=num_workers)
sync_replicas_hook = optimizer.make_session_run_hook(True) sync_replicas_hook = optimizer.make_session_run_hook(True)
...@@ -274,7 +273,6 @@ def input_fn(data_dir, ...@@ -274,7 +273,6 @@ def input_fn(data_dir,
return feature_shards, label_shards return feature_shards, label_shards
# create experiment
def get_experiment_fn(data_dir, def get_experiment_fn(data_dir,
num_gpus, num_gpus,
variable_strategy, variable_strategy,
...@@ -343,7 +341,8 @@ def get_experiment_fn(data_dir, ...@@ -343,7 +341,8 @@ def get_experiment_fn(data_dir,
model_fn=get_model_fn(num_gpus, variable_strategy, data_format, model_fn=get_model_fn(num_gpus, variable_strategy, data_format,
run_config.num_worker_replicas or 1), run_config.num_worker_replicas or 1),
config=run_config, config=run_config,
params=vars(hparams)) params=hparams
)
# Create experiment. # Create experiment.
experiment = tf.contrib.learn.Experiment( experiment = tf.contrib.learn.Experiment(
...@@ -355,7 +354,6 @@ def get_experiment_fn(data_dir, ...@@ -355,7 +354,6 @@ def get_experiment_fn(data_dir,
# Adding hooks to be used by the estimator on training modes # Adding hooks to be used by the estimator on training modes
experiment.extend_train_hooks(hooks) experiment.extend_train_hooks(hooks)
return experiment return experiment
return _experiment_fn return _experiment_fn
...@@ -468,13 +466,11 @@ if __name__ == '__main__': ...@@ -468,13 +466,11 @@ if __name__ == '__main__':
parser.add_argument( parser.add_argument(
'--num-intra-threads', '--num-intra-threads',
type=int, type=int,
default=1, default=0,
help="""\ help="""\
Number of threads to use for intra-op parallelism. If set to 0, the Number of threads to use for intra-op parallelism. When training on CPU
system will pick an appropriate number. The default is 1 since in this set to 0 to have the system pick the appropriate number or alternatively
example CPU only handles the input pipeline and gradient aggregation set it to the number of physical CPU cores.\
(when --is-cpu-ps). Ops that could potentially benefit from intra-op
parallelism are scheduled to run on GPUs.\
""") """)
parser.add_argument( parser.add_argument(
'--num-inter-threads', '--num-inter-threads',
...@@ -511,15 +507,16 @@ if __name__ == '__main__': ...@@ -511,15 +507,16 @@ if __name__ == '__main__':
if args.num_gpus < 0: if args.num_gpus < 0:
raise ValueError( raise ValueError(
'Invalid GPU count: \"num_gpus\" must be 0 or a positive integer.') 'Invalid GPU count: \"--num-gpus\" must be 0 or a positive integer.')
if args.num_gpus == 0 and args.variable_strategy == 'GPU': if args.num_gpus == 0 and args.variable_strategy == 'GPU':
raise ValueError( raise ValueError(
'No GPU available for use, must use CPU to average gradients.') 'num-gpus=0, CPU must be used as parameter server. Set'
'--variable-strategy=CPU.')
if (args.num_layers - 2) % 6 != 0: if (args.num_layers - 2) % 6 != 0:
raise ValueError('Invalid num_layers parameter.') raise ValueError('Invalid --num-layers parameter.')
if args.num_gpus != 0 and args.train_batch_size % args.num_gpus != 0: if args.num_gpus != 0 and args.train_batch_size % args.num_gpus != 0:
raise ValueError('train_batch_size must be multiple of num_gpus.') raise ValueError('--train-batch-size must be multiple of --num-gpus.')
if args.num_gpus != 0 and args.eval_batch_size % args.num_gpus != 0: if args.num_gpus != 0 and args.eval_batch_size % args.num_gpus != 0:
raise ValueError('eval_batch_size must be multiple of num_gpus.') raise ValueError('--eval-batch-size must be multiple of --num-gpus.')
main(**vars(args)) main(**vars(args))
...@@ -2,6 +2,8 @@ import collections ...@@ -2,6 +2,8 @@ import collections
import six import six
import tensorflow as tf import tensorflow as tf
import tensorflow as tf
from tensorflow.python.platform import tf_logging as logging from tensorflow.python.platform import tf_logging as logging
from tensorflow.core.framework import node_def_pb2 from tensorflow.core.framework import node_def_pb2
from tensorflow.python.framework import device as pydev from tensorflow.python.framework import device as pydev
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment