Commit 52e4ded8 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 310762203
parent 5e641c43
......@@ -18,6 +18,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import tensorflow as tf
from official.nlp.modeling import layers
......@@ -47,25 +49,34 @@ class Attention(tf.keras.layers.Layer):
"""Builds the layer."""
# Layers for linearly projecting the queries, keys, and values.
size_per_head = self.hidden_size // self.num_heads
def _glorot_initializer(fan_in, fan_out):
limit = math.sqrt(6.0 / (fan_in + fan_out))
return tf.keras.initializers.RandomUniform(minval=-limit, maxval=limit)
attention_initializer = _glorot_initializer(input_shape[-1],
self.hidden_size)
self.query_dense_layer = layers.DenseEinsum(
output_shape=(self.num_heads, size_per_head),
kernel_initializer="glorot_uniform",
kernel_initializer=attention_initializer,
use_bias=False,
name="query")
self.key_dense_layer = layers.DenseEinsum(
output_shape=(self.num_heads, size_per_head),
kernel_initializer="glorot_uniform",
kernel_initializer=attention_initializer,
use_bias=False,
name="key")
self.value_dense_layer = layers.DenseEinsum(
output_shape=(self.num_heads, size_per_head),
kernel_initializer="glorot_uniform",
kernel_initializer=attention_initializer,
use_bias=False,
name="value")
output_initializer = _glorot_initializer(self.hidden_size, self.hidden_size)
self.output_dense_layer = layers.DenseEinsum(
output_shape=self.hidden_size,
num_summed_dimensions=2,
kernel_initializer="glorot_uniform",
kernel_initializer=output_initializer,
use_bias=False,
name="output_transform")
super(Attention, self).build(input_shape)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment