Commit 5f6b0ca3 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Relax constraint that input size must be divisble by the number of heads.

Also, refactor uvit builder.

PiperOrigin-RevId: 443105970
parent 44065de2
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
"""Keras-based TransformerEncoder block layer.""" """Keras-based TransformerEncoder block layer."""
from absl import logging
import tensorflow as tf import tensorflow as tf
from official.nlp.modeling.layers import util from official.nlp.modeling.layers import util
...@@ -176,9 +177,9 @@ class TransformerEncoderBlock(tf.keras.layers.Layer): ...@@ -176,9 +177,9 @@ class TransformerEncoderBlock(tf.keras.layers.Layer):
einsum_equation = "...bc,cd->...bd" einsum_equation = "...bc,cd->...bd"
hidden_size = input_tensor_shape[-1] hidden_size = input_tensor_shape[-1]
if hidden_size % self._num_heads != 0: if hidden_size % self._num_heads != 0:
raise ValueError( logging.warning(
"The input size (%d) is not a multiple of the number of attention " "The input size (%d) is not a multiple of the number of attention "
"heads (%d)" % (hidden_size, self._num_heads)) "heads (%d)", hidden_size, self._num_heads)
if self._key_dim is None: if self._key_dim is None:
self._key_dim = int(hidden_size // self._num_heads) self._key_dim = int(hidden_size // self._num_heads)
if self._output_last_dim is None: if self._output_last_dim is None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment