Merge pull request #1461 from EvanKepner/master

update sequence loss to contrib from legacy

Merge pull request #1461 from EvanKepner/master
update sequence loss to contrib from legacy
1514b376 · Lukasz Kaiser · GitHub · c9f2ae14 · 983b7d08 · 1514b376
Commit 1514b376 authored Jun 16, 2017 by Lukasz Kaiser Committed by GitHub Jun 16, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 15 additions and 5 deletions

tutorials/rnn/ptb/ptb_word_lm.py tutorials/rnn/ptb/ptb_word_lm.py +15 -5

No files found.
--- a/tutorials/rnn/ptb/ptb_word_lm.py
+++ b/tutorials/rnn/ptb/ptb_word_lm.py
@@ -162,11 +162,21 @@ class PTBModel(object):
        "softmax_w", [size, vocab_size], dtype=data_type())
    softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
    logits = tf.matmul(output, softmax_w) + softmax_b
-    loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
-        [logits],
-        [tf.reshape(input_.targets, [-1])],
-        [tf.ones([batch_size * num_steps], dtype=data_type())])
-    self._cost = cost = tf.reduce_sum(loss) / batch_size
+
+    # Reshape logits to be 3-D tensor for sequence loss
+    logits = tf.reshape(logits, [batch_size, num_steps, vocab_size])
+
+    # use the contrib sequence loss and average over the batches
+    loss = tf.contrib.seq2seq.sequence_loss(
+        logits,
+        input_.targets,
+        tf.ones([batch_size, num_steps], dtype=data_type()),
+        average_across_timesteps=False,
+        average_across_batch=True
+    )
+
+    # update the cost variables
+    self._cost = cost = tf.reduce_sum(loss)
    self._final_state = state

    if not is_training: