Commit 52e4ded8 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 310762203
parent 5e641c43
...@@ -18,6 +18,8 @@ from __future__ import absolute_import ...@@ -18,6 +18,8 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import math
import tensorflow as tf import tensorflow as tf
from official.nlp.modeling import layers from official.nlp.modeling import layers
...@@ -47,25 +49,34 @@ class Attention(tf.keras.layers.Layer): ...@@ -47,25 +49,34 @@ class Attention(tf.keras.layers.Layer):
"""Builds the layer.""" """Builds the layer."""
# Layers for linearly projecting the queries, keys, and values. # Layers for linearly projecting the queries, keys, and values.
size_per_head = self.hidden_size // self.num_heads size_per_head = self.hidden_size // self.num_heads
def _glorot_initializer(fan_in, fan_out):
limit = math.sqrt(6.0 / (fan_in + fan_out))
return tf.keras.initializers.RandomUniform(minval=-limit, maxval=limit)
attention_initializer = _glorot_initializer(input_shape[-1],
self.hidden_size)
self.query_dense_layer = layers.DenseEinsum( self.query_dense_layer = layers.DenseEinsum(
output_shape=(self.num_heads, size_per_head), output_shape=(self.num_heads, size_per_head),
kernel_initializer="glorot_uniform", kernel_initializer=attention_initializer,
use_bias=False, use_bias=False,
name="query") name="query")
self.key_dense_layer = layers.DenseEinsum( self.key_dense_layer = layers.DenseEinsum(
output_shape=(self.num_heads, size_per_head), output_shape=(self.num_heads, size_per_head),
kernel_initializer="glorot_uniform", kernel_initializer=attention_initializer,
use_bias=False, use_bias=False,
name="key") name="key")
self.value_dense_layer = layers.DenseEinsum( self.value_dense_layer = layers.DenseEinsum(
output_shape=(self.num_heads, size_per_head), output_shape=(self.num_heads, size_per_head),
kernel_initializer="glorot_uniform", kernel_initializer=attention_initializer,
use_bias=False, use_bias=False,
name="value") name="value")
output_initializer = _glorot_initializer(self.hidden_size, self.hidden_size)
self.output_dense_layer = layers.DenseEinsum( self.output_dense_layer = layers.DenseEinsum(
output_shape=self.hidden_size, output_shape=self.hidden_size,
num_summed_dimensions=2, num_summed_dimensions=2,
kernel_initializer="glorot_uniform", kernel_initializer=output_initializer,
use_bias=False, use_bias=False,
name="output_transform") name="output_transform")
super(Attention, self).build(input_shape) super(Attention, self).build(input_shape)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment