"torchvision/git@developer.sourcefind.cn:OpenDAS/vision.git" did not exist on "87d54c4e583207e7b003d6b59f1e7f49167f68f1"
Unverified Commit 1fb34e76 authored by Hongkun Yu's avatar Hongkun Yu Committed by GitHub
Browse files

Merged commit includes the following changes: (#7252)

258597234  by rxsang<rxsang@google.com>:

    Update all the TPUStrategy examples to use the new v2 APIs, i.e.
    make_dataset_iterator -> experimental_distribute_dataset,
    make_input_fn_iterator -> experimental_distribute_datasets_from_function,
    unwrap -> experimental_local_results,
    experimental_run -> experimental_run_v2

--
258581998  by taylorrobie<taylorrobie@google.com>:

    Update keras v2 optimizers to reuse coefficients which are shared across all updates, which reduces the total number of ops created by between 5% (for simple optimizers such as SGD and Adagrad) and 25% (for complicated optimizers such as Adam and NAdam). Separate copies are made for each device and dtype.

    The effect of this change on run time is fairly minimal since Grappler is expected to consolidate most of these ops; however it does improve graph construction time.

--

PiperOrigin-RevId: 258597234
parent 79b87be6
...@@ -134,29 +134,29 @@ class AdamWeightDecay(tf.keras.optimizers.Adam): ...@@ -134,29 +134,29 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
(grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
return super(AdamWeightDecay, self).apply_gradients(zip(grads, tvars)) return super(AdamWeightDecay, self).apply_gradients(zip(grads, tvars))
def _resource_apply_dense(self, grad, var): def _get_lr(self, var_device, var_dtype, apply_state):
var_dtype = var.dtype.base_dtype if apply_state is None:
return self._decayed_lr_t[var_dtype], {}
try: apply_state = apply_state or {}
lr_t = self.apply_cache[var.device, var.dtype.base_dtype].lr_t coefficients = apply_state.get((var_device, var_dtype))
except AttributeError: if coefficients is None:
lr_t = self._decayed_lr_t[var_dtype] coefficients = self._fallback_apply_state(var_device, var_dtype)
apply_state[(var_device, var_dtype)] = coefficients
return coefficients['lr_t'], dict(apply_state=apply_state)
def _resource_apply_dense(self, grad, var, apply_state=None):
lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
with tf.control_dependencies([self._decay_weights_op(var, lr_t)]): with tf.control_dependencies([self._decay_weights_op(var, lr_t)]):
return super(AdamWeightDecay, self)._resource_apply_dense( return super(AdamWeightDecay, self)._resource_apply_dense(
grad, var) grad, var, **kwargs)
def _resource_apply_sparse(self, grad, var, indices):
var_dtype = var.dtype.base_dtype
try:
lr_t = self.apply_cache[var.device, var.dtype.base_dtype].lr_t
except AttributeError:
lr_t = self._decayed_lr_t[var_dtype]
def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
with tf.control_dependencies([self._decay_weights_op(var, lr_t)]): with tf.control_dependencies([self._decay_weights_op(var, lr_t)]):
return super(AdamWeightDecay, self)._resource_apply_sparse( return super(AdamWeightDecay, self)._resource_apply_sparse(
grad, var, indices) grad, var, indices, **kwargs)
def get_config(self): def get_config(self):
config = super(AdamWeightDecay, self).get_config() config = super(AdamWeightDecay, self).get_config()
......
...@@ -161,7 +161,7 @@ def predict_squad_customized(strategy, input_meta_data, bert_config, ...@@ -161,7 +161,7 @@ def predict_squad_customized(strategy, input_meta_data, bert_config,
outputs = strategy.experimental_run_v2( outputs = strategy.experimental_run_v2(
_replicated_step, args=(next(iterator),)) _replicated_step, args=(next(iterator),))
return tf.nest.map_structure(strategy.unwrap, outputs) return tf.nest.map_structure(strategy.experimental_local_results, outputs)
all_results = [] all_results = []
for _ in range(num_steps): for _ in range(num_steps):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment