Commit eb149850 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Relax constraint that input size must be divisble by the number of heads.

Also, refactor uvit builder.

PiperOrigin-RevId: 443105970
parent 9511e4cd
......@@ -14,6 +14,7 @@
"""Keras-based TransformerEncoder block layer."""
from absl import logging
import tensorflow as tf
from official.nlp.modeling.layers import util
......@@ -176,9 +177,9 @@ class TransformerEncoderBlock(tf.keras.layers.Layer):
einsum_equation = "...bc,cd->...bd"
hidden_size = input_tensor_shape[-1]
if hidden_size % self._num_heads != 0:
raise ValueError(
logging.warning(
"The input size (%d) is not a multiple of the number of attention "
"heads (%d)" % (hidden_size, self._num_heads))
"heads (%d)", hidden_size, self._num_heads)
if self._key_dim is None:
self._key_dim = int(hidden_size // self._num_heads)
if self._output_last_dim is None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment