Merge branch 'master' into cifar_mkl

aadf299d · Toby Boyd · add2845a · b8e7ff1c · aadf299d · aadf299d
Commit aadf299d authored Aug 25, 2017 by Toby Boyd
3 changed files
--- a/swivel/wordsim.py
+++ b/swivel/wordsim.py
@@ -34,6 +34,7 @@ the scored human judgement.
 """
+from __future__ import print_function
 import scipy.stats
 import sys
 from getopt import GetoptError, getopt
@@ -42,8 +43,8 @@ from vecs import Vecs
 try:
  opts, args = getopt(sys.argv[1:], '', ['embeddings=', 'vocab='])
-except GetoptError, e:
+except GetoptError as e:
-  print >> sys.stderr, e
+  print(e, file=sys.stderr)
  sys.exit(2)
 opt_embeddings = None
@@ -56,19 +57,20 @@ for o, a in opts:
    opt_vocab = a
 if not opt_vocab:
-  print >> sys.stderr, 'please specify a vocabulary file with "--vocab"'
+  print('please specify a vocabulary file with "--vocab"', file=sys.stderr)
  sys.exit(2)
 if not opt_embeddings:
-  print >> sys.stderr, 'please specify the embeddings with "--embeddings"'
+  print('please specify the embeddings with "--embeddings"', file=sys.stderr)
  sys.exit(2)
 try:
  vecs = Vecs(opt_vocab, opt_embeddings)
-except IOError, e:
+except IOError as e:
-  print >> sys.stderr, e
+  print(e, file=sys.stderr)
  sys.exit(1)
 def evaluate(lines):
  acts, preds = [], []
@@ -85,6 +87,7 @@ def evaluate(lines):
  rho, _ = scipy.stats.spearmanr(acts, preds)
  return rho
 for filename in args:
  with open(filename, 'r') as lines:
-    print '%0.3f %s' % (evaluate(lines), filename)
+    print('%0.3f %s' % (evaluate(lines), filename))
--- a/tutorials/image/cifar10_estimator/cifar10_main.py
+++ b/tutorials/image/cifar10_estimator/cifar10_main.py
@@ -59,13 +59,13 @@ def get_model_fn(num_gpus, variable_strategy, data_format, num_workers):
      features: a list of tensors, one for each tower
      labels: a list of tensors, one for each tower
      mode: ModeKeys.TRAIN or EVAL
-      params: Dictionary of Hyperparameters suitable for tuning
+      params: Hyperparameters suitable for tuning
    Returns:
      A EstimatorSpec object.
    """
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
-    weight_decay = params['weight_decay']
+    weight_decay = params.weight_decay
-    momentum = params['momentum']
+    momentum = params.momentum
    tower_features = features
    tower_labels = labels
@@ -96,8 +96,8 @@ def get_model_fn(num_gpus, variable_strategy, data_format, num_workers):
          with tf.device(device_setter):
            loss, gradvars, preds = _tower_fn(
                is_training, weight_decay, tower_features[i], tower_labels[i],
-                data_format, params['num_layers'], params['batch_norm_decay'],
+                data_format, params.num_layers, params.batch_norm_decay,
-                params['batch_norm_epsilon'])
+                params.batch_norm_epsilon)
            tower_losses.append(loss)
            tower_gradvars.append(gradvars)
            tower_preds.append(preds)
@@ -132,14 +132,13 @@ def get_model_fn(num_gpus, variable_strategy, data_format, num_workers):
    with tf.device(consolidation_device):
      # Suggested learning rate scheduling from
      # https://github.com/ppwwyyxx/tensorpack/blob/master/examples/ResNet/cifar10-resnet.py#L155
-      # users could apply other scheduling.
      num_batches_per_epoch = cifar10.Cifar10DataSet.num_examples_per_epoch(
-          'train') // (params['train_batch_size'] * num_workers)
+          'train') // (params.train_batch_size * num_workers)
      boundaries = [
          num_batches_per_epoch * x
          for x in np.array([82, 123, 300], dtype=np.int64)
      ]
-      staged_lr = [params['learning_rate'] * x for x in [1, 0.1, 0.01, 0.002]]
+      staged_lr = [params.learning_rate * x for x in [1, 0.1, 0.01, 0.002]]
      learning_rate = tf.train.piecewise_constant(tf.train.get_global_step(),
                                                  boundaries, staged_lr)
@@ -150,7 +149,7 @@ def get_model_fn(num_gpus, variable_strategy, data_format, num_workers):
          learning_rate=learning_rate, momentum=momentum)
      chief_hooks = []
-      if params['sync']:
+      if params.sync:
        optimizer = tf.train.SyncReplicasOptimizer(
            optimizer, replicas_to_aggregate=num_workers)
        sync_replicas_hook = optimizer.make_session_run_hook(True)
@@ -274,7 +273,6 @@ def input_fn(data_dir,
    return feature_shards, label_shards
-# create experiment
 def get_experiment_fn(data_dir,
                      num_gpus,
                      variable_strategy,
@@ -343,7 +341,8 @@ def get_experiment_fn(data_dir,
        model_fn=get_model_fn(num_gpus, variable_strategy, data_format,
                              run_config.num_worker_replicas or 1),
        config=run_config,
-        params=vars(hparams))
+        params=hparams
+    )
    # Create experiment.
    experiment = tf.contrib.learn.Experiment(
@@ -355,7 +354,6 @@ def get_experiment_fn(data_dir,
    # Adding hooks to be used by the estimator on training modes
    experiment.extend_train_hooks(hooks)
    return experiment
  return _experiment_fn
@@ -468,13 +466,11 @@ if __name__ == '__main__':
  parser.add_argument(
      '--num-intra-threads',
      type=int,
-      default=1,
+      default=0,
      help="""\
-      Number of threads to use for intra-op parallelism. If set to 0, the
+      Number of threads to use for intra-op parallelism. When training on CPU
-      system will pick an appropriate number. The default is 1 since in this
+      set to 0 to have the system pick the appropriate number or alternatively
-      example CPU only handles the input pipeline and gradient aggregation
+      set it to the number of physical CPU cores.\
-      (when --is-cpu-ps). Ops that could potentially benefit from intra-op
-      parallelism are scheduled to run on GPUs.\
      """)
  parser.add_argument(
      '--num-inter-threads',
@@ -511,15 +507,16 @@ if __name__ == '__main__':
    if args.num_gpus < 0:
    raise ValueError(
-        'Invalid GPU count: \"num_gpus\" must be 0 or a positive integer.')
+        'Invalid GPU count: \"--num-gpus\" must be 0 or a positive integer.')
  if args.num_gpus == 0 and args.variable_strategy == 'GPU':
    raise ValueError(
-        'No GPU available for use, must use CPU to average gradients.')
+        'num-gpus=0, CPU must be used as parameter server. Set'
+        '--variable-strategy=CPU.')
  if (args.num_layers - 2) % 6 != 0:
-    raise ValueError('Invalid num_layers parameter.')
+    raise ValueError('Invalid --num-layers parameter.')
  if args.num_gpus != 0 and args.train_batch_size % args.num_gpus != 0:
-    raise ValueError('train_batch_size must be multiple of num_gpus.')
+    raise ValueError('--train-batch-size must be multiple of --num-gpus.')
  if args.num_gpus != 0 and args.eval_batch_size % args.num_gpus != 0:
-    raise ValueError('eval_batch_size must be multiple of num_gpus.')
+    raise ValueError('--eval-batch-size must be multiple of --num-gpus.')
  main(**vars(args))
--- a/tutorials/image/cifar10_estimator/cifar10_utils.py
+++ b/tutorials/image/cifar10_estimator/cifar10_utils.py
@@ -2,6 +2,8 @@ import collections
 import six
 import tensorflow as tf
+import tensorflow as tf
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.core.framework import node_def_pb2
 from tensorflow.python.framework import device as pydev