Commit 8163baab authored by Tim Rault's avatar Tim Rault
Browse files

Convert indentation from 2 spaces to 4 spaces

parent 555b7d66
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -27,250 +27,249 @@ import tensorflow as tf ...@@ -27,250 +27,249 @@ import tensorflow as tf
class BertModelTest(tf.test.TestCase): class BertModelTest(tf.test.TestCase):
class BertModelTester(object):
class BertModelTester(object):
def __init__(self,
def __init__(self, parent,
parent, batch_size=13,
batch_size=13, seq_length=7,
seq_length=7, is_training=True,
is_training=True, use_input_mask=True,
use_input_mask=True, use_token_type_ids=True,
use_token_type_ids=True, vocab_size=99,
vocab_size=99, hidden_size=32,
hidden_size=32, num_hidden_layers=5,
num_hidden_layers=5, num_attention_heads=4,
num_attention_heads=4, intermediate_size=37,
intermediate_size=37, hidden_act="gelu",
hidden_act="gelu", hidden_dropout_prob=0.1,
hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, max_position_embeddings=512,
max_position_embeddings=512, type_vocab_size=16,
type_vocab_size=16, initializer_range=0.02,
initializer_range=0.02, scope=None):
scope=None): self.parent = parent
self.parent = parent self.batch_size = batch_size
self.batch_size = batch_size self.seq_length = seq_length
self.seq_length = seq_length self.is_training = is_training
self.is_training = is_training self.use_input_mask = use_input_mask
self.use_input_mask = use_input_mask self.use_token_type_ids = use_token_type_ids
self.use_token_type_ids = use_token_type_ids self.vocab_size = vocab_size
self.vocab_size = vocab_size self.hidden_size = hidden_size
self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers
self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads
self.num_attention_heads = num_attention_heads self.intermediate_size = intermediate_size
self.intermediate_size = intermediate_size self.hidden_act = hidden_act
self.hidden_act = hidden_act self.hidden_dropout_prob = hidden_dropout_prob
self.hidden_dropout_prob = hidden_dropout_prob self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.max_position_embeddings = max_position_embeddings
self.max_position_embeddings = max_position_embeddings self.type_vocab_size = type_vocab_size
self.type_vocab_size = type_vocab_size self.initializer_range = initializer_range
self.initializer_range = initializer_range self.scope = scope
self.scope = scope
def create_model(self):
def create_model(self): input_ids = BertModelTest.ids_tensor([self.batch_size, self.seq_length],
input_ids = BertModelTest.ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
self.vocab_size)
input_mask = None
input_mask = None if self.use_input_mask:
if self.use_input_mask: input_mask = BertModelTest.ids_tensor(
input_mask = BertModelTest.ids_tensor( [self.batch_size, self.seq_length], vocab_size=2)
[self.batch_size, self.seq_length], vocab_size=2)
token_type_ids = None
token_type_ids = None if self.use_token_type_ids:
if self.use_token_type_ids: token_type_ids = BertModelTest.ids_tensor(
token_type_ids = BertModelTest.ids_tensor( [self.batch_size, self.seq_length], self.type_vocab_size)
[self.batch_size, self.seq_length], self.type_vocab_size)
config = modeling.BertConfig(
config = modeling.BertConfig( vocab_size=self.vocab_size,
vocab_size=self.vocab_size, hidden_size=self.hidden_size,
hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers,
num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads,
num_attention_heads=self.num_attention_heads, intermediate_size=self.intermediate_size,
intermediate_size=self.intermediate_size, hidden_act=self.hidden_act,
hidden_act=self.hidden_act, hidden_dropout_prob=self.hidden_dropout_prob,
hidden_dropout_prob=self.hidden_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob,
attention_probs_dropout_prob=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings,
max_position_embeddings=self.max_position_embeddings, type_vocab_size=self.type_vocab_size,
type_vocab_size=self.type_vocab_size, initializer_range=self.initializer_range)
initializer_range=self.initializer_range)
model = modeling.BertModel(
model = modeling.BertModel( config=config,
config=config, is_training=self.is_training,
is_training=self.is_training, input_ids=input_ids,
input_ids=input_ids, input_mask=input_mask,
input_mask=input_mask, token_type_ids=token_type_ids,
token_type_ids=token_type_ids, scope=self.scope)
scope=self.scope)
outputs = {
outputs = { "embedding_output": model.get_embedding_output(),
"embedding_output": model.get_embedding_output(), "sequence_output": model.get_sequence_output(),
"sequence_output": model.get_sequence_output(), "pooled_output": model.get_pooled_output(),
"pooled_output": model.get_pooled_output(), "all_encoder_layers": model.get_all_encoder_layers(),
"all_encoder_layers": model.get_all_encoder_layers(), }
} return outputs
return outputs
def check_output(self, result):
def check_output(self, result): self.parent.assertAllEqual(
self.parent.assertAllEqual( result["embedding_output"].shape,
result["embedding_output"].shape, [self.batch_size, self.seq_length, self.hidden_size])
[self.batch_size, self.seq_length, self.hidden_size])
self.parent.assertAllEqual(
self.parent.assertAllEqual( result["sequence_output"].shape,
result["sequence_output"].shape, [self.batch_size, self.seq_length, self.hidden_size])
[self.batch_size, self.seq_length, self.hidden_size])
self.parent.assertAllEqual(result["pooled_output"].shape,
self.parent.assertAllEqual(result["pooled_output"].shape, [self.batch_size, self.hidden_size])
[self.batch_size, self.hidden_size])
def test_default(self):
def test_default(self): self.run_tester(BertModelTest.BertModelTester(self))
self.run_tester(BertModelTest.BertModelTester(self))
def test_config_to_json_string(self):
def test_config_to_json_string(self): config = modeling.BertConfig(vocab_size=99, hidden_size=37)
config = modeling.BertConfig(vocab_size=99, hidden_size=37) obj = json.loads(config.to_json_string())
obj = json.loads(config.to_json_string()) self.assertEqual(obj["vocab_size"], 99)
self.assertEqual(obj["vocab_size"], 99) self.assertEqual(obj["hidden_size"], 37)
self.assertEqual(obj["hidden_size"], 37)
def run_tester(self, tester):
def run_tester(self, tester): with self.test_session() as sess:
with self.test_session() as sess: ops = tester.create_model()
ops = tester.create_model() init_op = tf.group(tf.global_variables_initializer(),
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
tf.local_variables_initializer()) sess.run(init_op)
sess.run(init_op) output_result = sess.run(ops)
output_result = sess.run(ops) tester.check_output(output_result)
tester.check_output(output_result)
self.assert_all_tensors_reachable(sess, [init_op, ops])
self.assert_all_tensors_reachable(sess, [init_op, ops])
@classmethod
@classmethod def ids_tensor(cls, shape, vocab_size, rng=None, name=None):
def ids_tensor(cls, shape, vocab_size, rng=None, name=None): """Creates a random int32 tensor of the shape within the vocab size."""
"""Creates a random int32 tensor of the shape within the vocab size.""" if rng is None:
if rng is None: rng = random.Random()
rng = random.Random()
total_dims = 1
total_dims = 1 for dim in shape:
for dim in shape: total_dims *= dim
total_dims *= dim
values = []
values = [] for _ in range(total_dims):
for _ in range(total_dims): values.append(rng.randint(0, vocab_size - 1))
values.append(rng.randint(0, vocab_size - 1))
return tf.constant(value=values, dtype=tf.int32, shape=shape, name=name)
return tf.constant(value=values, dtype=tf.int32, shape=shape, name=name)
def assert_all_tensors_reachable(self, sess, outputs):
def assert_all_tensors_reachable(self, sess, outputs): """Checks that all the tensors in the graph are reachable from outputs."""
"""Checks that all the tensors in the graph are reachable from outputs.""" graph = sess.graph
graph = sess.graph
ignore_strings = [
ignore_strings = [ "^.*/dilation_rate$",
"^.*/dilation_rate$", "^.*/Tensordot/concat$",
"^.*/Tensordot/concat$", "^.*/Tensordot/concat/axis$",
"^.*/Tensordot/concat/axis$", "^testing/.*$",
"^testing/.*$", ]
]
ignore_regexes = [re.compile(x) for x in ignore_strings]
ignore_regexes = [re.compile(x) for x in ignore_strings]
unreachable = self.get_unreachable_ops(graph, outputs)
unreachable = self.get_unreachable_ops(graph, outputs) filtered_unreachable = []
filtered_unreachable = [] for x in unreachable:
for x in unreachable: do_ignore = False
do_ignore = False for r in ignore_regexes:
for r in ignore_regexes: m = r.match(x.name)
m = r.match(x.name) if m is not None:
if m is not None: do_ignore = True
do_ignore = True if do_ignore:
if do_ignore: continue
continue filtered_unreachable.append(x)
filtered_unreachable.append(x) unreachable = filtered_unreachable
unreachable = filtered_unreachable
self.assertEqual(
self.assertEqual( len(unreachable), 0, "The following ops are unreachable: %s" %
len(unreachable), 0, "The following ops are unreachable: %s" % (" ".join([x.name for x in unreachable])))
(" ".join([x.name for x in unreachable])))
@classmethod
@classmethod def get_unreachable_ops(cls, graph, outputs):
def get_unreachable_ops(cls, graph, outputs): """Finds all of the tensors in graph that are unreachable from outputs."""
"""Finds all of the tensors in graph that are unreachable from outputs.""" outputs = cls.flatten_recursive(outputs)
outputs = cls.flatten_recursive(outputs) output_to_op = collections.defaultdict(list)
output_to_op = collections.defaultdict(list) op_to_all = collections.defaultdict(list)
op_to_all = collections.defaultdict(list) assign_out_to_in = collections.defaultdict(list)
assign_out_to_in = collections.defaultdict(list)
for op in graph.get_operations():
for op in graph.get_operations(): for x in op.inputs:
for x in op.inputs: op_to_all[op.name].append(x.name)
op_to_all[op.name].append(x.name) for y in op.outputs:
for y in op.outputs: output_to_op[y.name].append(op.name)
output_to_op[y.name].append(op.name) op_to_all[op.name].append(y.name)
op_to_all[op.name].append(y.name) if str(op.type) == "Assign":
if str(op.type) == "Assign": for y in op.outputs:
for y in op.outputs: for x in op.inputs:
for x in op.inputs: assign_out_to_in[y.name].append(x.name)
assign_out_to_in[y.name].append(x.name)
assign_groups = collections.defaultdict(list)
assign_groups = collections.defaultdict(list) for out_name in assign_out_to_in.keys():
for out_name in assign_out_to_in.keys(): name_group = assign_out_to_in[out_name]
name_group = assign_out_to_in[out_name] for n1 in name_group:
for n1 in name_group: assign_groups[n1].append(out_name)
assign_groups[n1].append(out_name) for n2 in name_group:
for n2 in name_group: if n1 != n2:
if n1 != n2: assign_groups[n1].append(n2)
assign_groups[n1].append(n2)
seen_tensors = {}
seen_tensors = {} stack = [x.name for x in outputs]
stack = [x.name for x in outputs] while stack:
while stack: name = stack.pop()
name = stack.pop() if name in seen_tensors:
if name in seen_tensors: continue
continue seen_tensors[name] = True
seen_tensors[name] = True
if name in output_to_op:
if name in output_to_op: for op_name in output_to_op[name]:
for op_name in output_to_op[name]: if op_name in op_to_all:
if op_name in op_to_all: for input_name in op_to_all[op_name]:
for input_name in op_to_all[op_name]: if input_name not in stack:
if input_name not in stack: stack.append(input_name)
stack.append(input_name)
expanded_names = []
expanded_names = [] if name in assign_groups:
if name in assign_groups: for assign_name in assign_groups[name]:
for assign_name in assign_groups[name]: expanded_names.append(assign_name)
expanded_names.append(assign_name)
for expanded_name in expanded_names:
for expanded_name in expanded_names: if expanded_name not in stack:
if expanded_name not in stack: stack.append(expanded_name)
stack.append(expanded_name)
unreachable_ops = []
unreachable_ops = [] for op in graph.get_operations():
for op in graph.get_operations(): is_unreachable = False
is_unreachable = False all_names = [x.name for x in op.inputs] + [x.name for x in op.outputs]
all_names = [x.name for x in op.inputs] + [x.name for x in op.outputs] for name in all_names:
for name in all_names: if name not in seen_tensors:
if name not in seen_tensors: is_unreachable = True
is_unreachable = True if is_unreachable:
if is_unreachable: unreachable_ops.append(op)
unreachable_ops.append(op) return unreachable_ops
return unreachable_ops
@classmethod
@classmethod def flatten_recursive(cls, item):
def flatten_recursive(cls, item): """Flattens (potentially nested) a tuple/dictionary/list to a list."""
"""Flattens (potentially nested) a tuple/dictionary/list to a list.""" output = []
output = [] if isinstance(item, list):
if isinstance(item, list): output.extend(item)
output.extend(item) elif isinstance(item, tuple):
elif isinstance(item, tuple): output.extend(list(item))
output.extend(list(item)) elif isinstance(item, dict):
elif isinstance(item, dict): for (_, v) in six.iteritems(item):
for (_, v) in six.iteritems(item): output.append(v)
output.append(v) else:
else: return [item]
return [item]
flat_output = []
flat_output = [] for x in output:
for x in output: flat_output.extend(cls.flatten_recursive(x))
flat_output.extend(cls.flatten_recursive(x)) return flat_output
return flat_output
if __name__ == "__main__": if __name__ == "__main__":
tf.test.main() tf.test.main()
...@@ -23,149 +23,149 @@ import tensorflow as tf ...@@ -23,149 +23,149 @@ import tensorflow as tf
def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, use_tpu): def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, use_tpu):
"""Creates an optimizer training op.""" """Creates an optimizer training op."""
global_step = tf.train.get_or_create_global_step() global_step = tf.train.get_or_create_global_step()
learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32) learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32)
# Implements linear decay of the learning rate. # Implements linear decay of the learning rate.
learning_rate = tf.train.polynomial_decay( learning_rate = tf.train.polynomial_decay(
learning_rate, learning_rate,
global_step, global_step,
num_train_steps, num_train_steps,
end_learning_rate=0.0, end_learning_rate=0.0,
power=1.0, power=1.0,
cycle=False) cycle=False)
# Implements linear warmup. I.e., if global_step < num_warmup_steps, the # Implements linear warmup. I.e., if global_step < num_warmup_steps, the
# learning rate will be `global_step/num_warmup_steps * init_lr`. # learning rate will be `global_step/num_warmup_steps * init_lr`.
if num_warmup_steps: if num_warmup_steps:
global_steps_int = tf.cast(global_step, tf.int32) global_steps_int = tf.cast(global_step, tf.int32)
warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32) warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32)
global_steps_float = tf.cast(global_steps_int, tf.float32) global_steps_float = tf.cast(global_steps_int, tf.float32)
warmup_steps_float = tf.cast(warmup_steps_int, tf.float32) warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)
warmup_percent_done = global_steps_float / warmup_steps_float warmup_percent_done = global_steps_float / warmup_steps_float
warmup_learning_rate = init_lr * warmup_percent_done warmup_learning_rate = init_lr * warmup_percent_done
is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32) is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32)
learning_rate = ( learning_rate = (
(1.0 - is_warmup) * learning_rate + is_warmup * warmup_learning_rate) (1.0 - is_warmup) * learning_rate + is_warmup * warmup_learning_rate)
# It is recommended that you use this optimizer for fine tuning, since this # It is recommended that you use this optimizer for fine tuning, since this
# is how the model was trained (note that the Adam m/v variables are NOT # is how the model was trained (note that the Adam m/v variables are NOT
# loaded from init_checkpoint.) # loaded from init_checkpoint.)
optimizer = AdamWeightDecayOptimizer( optimizer = AdamWeightDecayOptimizer(
learning_rate=learning_rate, learning_rate=learning_rate,
weight_decay_rate=0.01, weight_decay_rate=0.01,
beta_1=0.9, beta_1=0.9,
beta_2=0.999, beta_2=0.999,
epsilon=1e-6, epsilon=1e-6,
exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"]) exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"])
if use_tpu: if use_tpu:
optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer) optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
tvars = tf.trainable_variables() tvars = tf.trainable_variables()
grads = tf.gradients(loss, tvars) grads = tf.gradients(loss, tvars)
# This is how the model was pre-trained. # This is how the model was pre-trained.
(grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
train_op = optimizer.apply_gradients( train_op = optimizer.apply_gradients(
zip(grads, tvars), global_step=global_step) zip(grads, tvars), global_step=global_step)
new_global_step = global_step + 1 new_global_step = global_step + 1
train_op = tf.group(train_op, [global_step.assign(new_global_step)]) train_op = tf.group(train_op, [global_step.assign(new_global_step)])
return train_op return train_op
class AdamWeightDecayOptimizer(tf.train.Optimizer): class AdamWeightDecayOptimizer(tf.train.Optimizer):
"""A basic Adam optimizer that includes "correct" L2 weight decay.""" """A basic Adam optimizer that includes "correct" L2 weight decay."""
def __init__(self, def __init__(self,
learning_rate, learning_rate,
weight_decay_rate=0.0, weight_decay_rate=0.0,
beta_1=0.9, beta_1=0.9,
beta_2=0.999, beta_2=0.999,
epsilon=1e-6, epsilon=1e-6,
exclude_from_weight_decay=None, exclude_from_weight_decay=None,
name="AdamWeightDecayOptimizer"): name="AdamWeightDecayOptimizer"):
"""Constructs a AdamWeightDecayOptimizer.""" """Constructs a AdamWeightDecayOptimizer."""
super(AdamWeightDecayOptimizer, self).__init__(False, name) super(AdamWeightDecayOptimizer, self).__init__(False, name)
self.learning_rate = learning_rate self.learning_rate = learning_rate
self.weight_decay_rate = weight_decay_rate self.weight_decay_rate = weight_decay_rate
self.beta_1 = beta_1 self.beta_1 = beta_1
self.beta_2 = beta_2 self.beta_2 = beta_2
self.epsilon = epsilon self.epsilon = epsilon
self.exclude_from_weight_decay = exclude_from_weight_decay self.exclude_from_weight_decay = exclude_from_weight_decay
def apply_gradients(self, grads_and_vars, global_step=None, name=None): def apply_gradients(self, grads_and_vars, global_step=None, name=None):
"""See base class.""" """See base class."""
assignments = [] assignments = []
for (grad, param) in grads_and_vars: for (grad, param) in grads_and_vars:
if grad is None or param is None: if grad is None or param is None:
continue continue
param_name = self._get_variable_name(param.name) param_name = self._get_variable_name(param.name)
m = tf.get_variable( m = tf.get_variable(
name=param_name + "/adam_m", name=param_name + "/adam_m",
shape=param.shape.as_list(), shape=param.shape.as_list(),
dtype=tf.float32, dtype=tf.float32,
trainable=False, trainable=False,
initializer=tf.zeros_initializer()) initializer=tf.zeros_initializer())
v = tf.get_variable( v = tf.get_variable(
name=param_name + "/adam_v", name=param_name + "/adam_v",
shape=param.shape.as_list(), shape=param.shape.as_list(),
dtype=tf.float32, dtype=tf.float32,
trainable=False, trainable=False,
initializer=tf.zeros_initializer()) initializer=tf.zeros_initializer())
# Standard Adam update. # Standard Adam update.
next_m = ( next_m = (
tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad)) tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad))
next_v = ( next_v = (
tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2, tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2,
tf.square(grad))) tf.square(grad)))
update = next_m / (tf.sqrt(next_v) + self.epsilon) update = next_m / (tf.sqrt(next_v) + self.epsilon)
# Just adding the square of the weights to the loss function is *not* # Just adding the square of the weights to the loss function is *not*
# the correct way of using L2 regularization/weight decay with Adam, # the correct way of using L2 regularization/weight decay with Adam,
# since that will interact with the m and v parameters in strange ways. # since that will interact with the m and v parameters in strange ways.
# #
# Instead we want ot decay the weights in a manner that doesn't interact # Instead we want ot decay the weights in a manner that doesn't interact
# with the m/v parameters. This is equivalent to adding the square # with the m/v parameters. This is equivalent to adding the square
# of the weights to the loss with plain (non-momentum) SGD. # of the weights to the loss with plain (non-momentum) SGD.
if self._do_use_weight_decay(param_name): if self._do_use_weight_decay(param_name):
update += self.weight_decay_rate * param update += self.weight_decay_rate * param
update_with_lr = self.learning_rate * update update_with_lr = self.learning_rate * update
next_param = param - update_with_lr next_param = param - update_with_lr
assignments.extend( assignments.extend(
[param.assign(next_param), [param.assign(next_param),
m.assign(next_m), m.assign(next_m),
v.assign(next_v)]) v.assign(next_v)])
return tf.group(*assignments, name=name) return tf.group(*assignments, name=name)
def _do_use_weight_decay(self, param_name): def _do_use_weight_decay(self, param_name):
"""Whether to use L2 weight decay for `param_name`.""" """Whether to use L2 weight decay for `param_name`."""
if not self.weight_decay_rate: if not self.weight_decay_rate:
return False return False
if self.exclude_from_weight_decay: if self.exclude_from_weight_decay:
for r in self.exclude_from_weight_decay: for r in self.exclude_from_weight_decay:
if re.search(r, param_name) is not None: if re.search(r, param_name) is not None:
return False return False
return True return True
def _get_variable_name(self, param_name): def _get_variable_name(self, param_name):
"""Get the variable name from the tensor name.""" """Get the variable name from the tensor name."""
m = re.match("^(.*):\\d+$", param_name) m = re.match("^(.*):\\d+$", param_name)
if m is not None: if m is not None:
param_name = m.group(1) param_name = m.group(1)
return param_name return param_name
...@@ -22,27 +22,27 @@ import tensorflow as tf ...@@ -22,27 +22,27 @@ import tensorflow as tf
class OptimizationTest(tf.test.TestCase): class OptimizationTest(tf.test.TestCase):
def test_adam(self): def test_adam(self):
with self.test_session() as sess: with self.test_session() as sess:
w = tf.get_variable( w = tf.get_variable(
"w", "w",
shape=[3], shape=[3],
initializer=tf.constant_initializer([0.1, -0.2, -0.1])) initializer=tf.constant_initializer([0.1, -0.2, -0.1]))
x = tf.constant([0.4, 0.2, -0.5]) x = tf.constant([0.4, 0.2, -0.5])
loss = tf.reduce_mean(tf.square(x - w)) loss = tf.reduce_mean(tf.square(x - w))
tvars = tf.trainable_variables() tvars = tf.trainable_variables()
grads = tf.gradients(loss, tvars) grads = tf.gradients(loss, tvars)
global_step = tf.train.get_or_create_global_step() global_step = tf.train.get_or_create_global_step()
optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2) optimizer = optimization.AdamWeightDecayOptimizer(learning_rate=0.2)
train_op = optimizer.apply_gradients(zip(grads, tvars), global_step) train_op = optimizer.apply_gradients(zip(grads, tvars), global_step)
init_op = tf.group(tf.global_variables_initializer(), init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer()) tf.local_variables_initializer())
sess.run(init_op) sess.run(init_op)
for _ in range(100): for _ in range(100):
sess.run(train_op) sess.run(train_op)
w_np = sess.run(w) w_np = sess.run(w)
self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2) self.assertAllClose(w_np.flat, [0.4, 0.2, -0.5], rtol=1e-2, atol=1e-2)
if __name__ == "__main__": if __name__ == "__main__":
tf.test.main() tf.test.main()
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -25,101 +25,101 @@ import tensorflow as tf ...@@ -25,101 +25,101 @@ import tensorflow as tf
class TokenizationTest(tf.test.TestCase): class TokenizationTest(tf.test.TestCase):
def test_full_tokenizer(self): def test_full_tokenizer(self):
vocab_tokens = [ vocab_tokens = [
"[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn",
"##ing", "," "##ing", ","
] ]
with tempfile.NamedTemporaryFile(delete=False) as vocab_writer: with tempfile.NamedTemporaryFile(delete=False) as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))
vocab_file = vocab_writer.name vocab_file = vocab_writer.name
tokenizer = tokenization.FullTokenizer(vocab_file) tokenizer = tokenization.FullTokenizer(vocab_file)
os.unlink(vocab_file) os.unlink(vocab_file)
tokens = tokenizer.tokenize(u"UNwant\u00E9d,running") tokens = tokenizer.tokenize(u"UNwant\u00E9d,running")
self.assertAllEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"]) self.assertAllEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"])
self.assertAllEqual( self.assertAllEqual(
tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9]) tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9])
def test_basic_tokenizer_lower(self): def test_basic_tokenizer_lower(self):
tokenizer = tokenization.BasicTokenizer(do_lower_case=True) tokenizer = tokenization.BasicTokenizer(do_lower_case=True)
self.assertAllEqual( self.assertAllEqual(
tokenizer.tokenize(u" \tHeLLo!how \n Are yoU? "), tokenizer.tokenize(u" \tHeLLo!how \n Are yoU? "),
["hello", "!", "how", "are", "you", "?"]) ["hello", "!", "how", "are", "you", "?"])
self.assertAllEqual(tokenizer.tokenize(u"H\u00E9llo"), ["hello"]) self.assertAllEqual(tokenizer.tokenize(u"H\u00E9llo"), ["hello"])
def test_basic_tokenizer_no_lower(self): def test_basic_tokenizer_no_lower(self):
tokenizer = tokenization.BasicTokenizer(do_lower_case=False) tokenizer = tokenization.BasicTokenizer(do_lower_case=False)
self.assertAllEqual( self.assertAllEqual(
tokenizer.tokenize(u" \tHeLLo!how \n Are yoU? "), tokenizer.tokenize(u" \tHeLLo!how \n Are yoU? "),
["HeLLo", "!", "how", "Are", "yoU", "?"]) ["HeLLo", "!", "how", "Are", "yoU", "?"])
def test_wordpiece_tokenizer(self): def test_wordpiece_tokenizer(self):
vocab_tokens = [ vocab_tokens = [
"[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn",
"##ing" "##ing"
] ]
vocab = {} vocab = {}
for (i, token) in enumerate(vocab_tokens): for (i, token) in enumerate(vocab_tokens):
vocab[token] = i vocab[token] = i
tokenizer = tokenization.WordpieceTokenizer(vocab=vocab) tokenizer = tokenization.WordpieceTokenizer(vocab=vocab)
self.assertAllEqual(tokenizer.tokenize(""), []) self.assertAllEqual(tokenizer.tokenize(""), [])
self.assertAllEqual( self.assertAllEqual(
tokenizer.tokenize("unwanted running"), tokenizer.tokenize("unwanted running"),
["un", "##want", "##ed", "runn", "##ing"]) ["un", "##want", "##ed", "runn", "##ing"])
self.assertAllEqual( self.assertAllEqual(
tokenizer.tokenize("unwantedX running"), ["[UNK]", "runn", "##ing"]) tokenizer.tokenize("unwantedX running"), ["[UNK]", "runn", "##ing"])
def test_convert_tokens_to_ids(self): def test_convert_tokens_to_ids(self):
vocab_tokens = [ vocab_tokens = [
"[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn", "[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn",
"##ing" "##ing"
] ]
vocab = {} vocab = {}
for (i, token) in enumerate(vocab_tokens): for (i, token) in enumerate(vocab_tokens):
vocab[token] = i vocab[token] = i
self.assertAllEqual( self.assertAllEqual(
tokenization.convert_tokens_to_ids( tokenization.convert_tokens_to_ids(
vocab, ["un", "##want", "##ed", "runn", "##ing"]), [7, 4, 5, 8, 9]) vocab, ["un", "##want", "##ed", "runn", "##ing"]), [7, 4, 5, 8, 9])
def test_is_whitespace(self): def test_is_whitespace(self):
self.assertTrue(tokenization._is_whitespace(u" ")) self.assertTrue(tokenization._is_whitespace(u" "))
self.assertTrue(tokenization._is_whitespace(u"\t")) self.assertTrue(tokenization._is_whitespace(u"\t"))
self.assertTrue(tokenization._is_whitespace(u"\r")) self.assertTrue(tokenization._is_whitespace(u"\r"))
self.assertTrue(tokenization._is_whitespace(u"\n")) self.assertTrue(tokenization._is_whitespace(u"\n"))
self.assertTrue(tokenization._is_whitespace(u"\u00A0")) self.assertTrue(tokenization._is_whitespace(u"\u00A0"))
self.assertFalse(tokenization._is_whitespace(u"A")) self.assertFalse(tokenization._is_whitespace(u"A"))
self.assertFalse(tokenization._is_whitespace(u"-")) self.assertFalse(tokenization._is_whitespace(u"-"))
def test_is_control(self): def test_is_control(self):
self.assertTrue(tokenization._is_control(u"\u0005")) self.assertTrue(tokenization._is_control(u"\u0005"))
self.assertFalse(tokenization._is_control(u"A")) self.assertFalse(tokenization._is_control(u"A"))
self.assertFalse(tokenization._is_control(u" ")) self.assertFalse(tokenization._is_control(u" "))
self.assertFalse(tokenization._is_control(u"\t")) self.assertFalse(tokenization._is_control(u"\t"))
self.assertFalse(tokenization._is_control(u"\r")) self.assertFalse(tokenization._is_control(u"\r"))
def test_is_punctuation(self): def test_is_punctuation(self):
self.assertTrue(tokenization._is_punctuation(u"-")) self.assertTrue(tokenization._is_punctuation(u"-"))
self.assertTrue(tokenization._is_punctuation(u"$")) self.assertTrue(tokenization._is_punctuation(u"$"))
self.assertTrue(tokenization._is_punctuation(u"`")) self.assertTrue(tokenization._is_punctuation(u"`"))
self.assertTrue(tokenization._is_punctuation(u".")) self.assertTrue(tokenization._is_punctuation(u"."))
self.assertFalse(tokenization._is_punctuation(u"A")) self.assertFalse(tokenization._is_punctuation(u"A"))
self.assertFalse(tokenization._is_punctuation(u" ")) self.assertFalse(tokenization._is_punctuation(u" "))
if __name__ == "__main__": if __name__ == "__main__":
tf.test.main() tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment