"git@developer.sourcefind.cn:OpenDAS/megatron-lm.git" did not exist on "ee57e0865a7586ad2e2e895b232a311246a518ed"
Commit 35fda973 authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

work on fixing

parent 66e8a904
...@@ -50,130 +50,136 @@ class ContextProjection(tf.keras.layers.Layer): ...@@ -50,130 +50,136 @@ class ContextProjection(tf.keras.layers.Layer):
return self.projection(self.batch_norm(input_features, is_training)) return self.projection(self.batch_norm(input_features, is_training))
class AttentionBlock(tf.keras.layers.Layer): class AttentionBlock(tf.keras.layers.Layer):
def __init__(self, bottleneck_dimension, attention_temperature, freeze_batchnorm, **kwargs): def __init__(self, bottleneck_dimension, attention_temperature, freeze_batchnorm, output_dimension=None, **kwargs):
self.key_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm) self.key_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
self.val_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm) self.val_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
self.query_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm) self.query_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
self.attention_temperature = attention_temperature self.attention_temperature = attention_temperature
self.freeze_batchnorm = freeze_batchnorm self.freeze_batchnorm = freeze_batchnorm
self.bottleneck_dimension = bottleneck_dimension self.bottleneck_dimension = bottleneck_dimension
if output_dimension:
self.output_dimension = output_dimension
super(AttentionBlock, self).__init__(**kwargs) super(AttentionBlock, self).__init__(**kwargs)
def set_output_dimension(self, new_output_dimension):
self.output_dimension = new_output_dimension
def build(self, input_shapes): def build(self, input_shapes):
self.feature_proj = ContextProjection(input_shapes[0][-1], self.freeze_batchnorm) print(input_shapes)
self.feature_proj = ContextProjection(self.output_dimension, self.freeze_batchnorm)
#self.key_proj.build(input_shapes[0]) #self.key_proj.build(input_shapes[0])
#self.val_proj.build(input_shapes[0]) #self.val_proj.build(input_shapes[0])
#self.query_proj.build(input_shapes[0]) #self.query_proj.build(input_shapes[0])
#self.feature_proj.build(input_shapes[0]) #self.feature_proj.build(input_shapes[0])
pass pass
def filter_weight_value(self, weights, values, valid_mask): def call(self, input_features, is_training, valid_mask):
"""Filters weights and values based on valid_mask. input_features, context_features = input_features
with tf.variable_scope("AttentionBlock"):
queries = project_features(
input_features, self.bottleneck_dimension, is_training,
self.query_proj, normalize=True)
keys = project_features(
context_features, self.bottleneck_dimension, is_training,
self.key_proj, normalize=True)
values = project_features(
context_features, self.bottleneck_dimension, is_training,
self.val_proj, normalize=True)
_NEGATIVE_PADDING_VALUE will be added to invalid elements in the weights to weights = tf.matmul(queries, keys, transpose_b=True)
avoid their contribution in softmax. 0 will be set for the invalid elements in
the values.
Args: weights, values = filter_weight_value(weights, values, valid_mask)
weights: A float Tensor of shape [batch_size, input_size, context_size].
values: A float Tensor of shape [batch_size, context_size,
projected_dimension].
valid_mask: A boolean Tensor of shape [batch_size, context_size]. True means
valid and False means invalid.
Returns: weights = tf.nn.softmax(weights / self.attention_temperature)
weights: A float Tensor of shape [batch_size, input_size, context_size].
values: A float Tensor of shape [batch_size, context_size,
projected_dimension].
Raises: features = tf.matmul(weights, values)
ValueError: If shape of doesn't match. output_features = project_features(
""" features, self.output_dimension, is_training,
w_batch_size, _, w_context_size = weights.shape self.feature_proj, normalize=False)
v_batch_size, v_context_size, _ = values.shape return output_features
m_batch_size, m_context_size = valid_mask.shape
if w_batch_size != v_batch_size or v_batch_size != m_batch_size:
raise ValueError("Please make sure the first dimension of the input"
" tensors are the same.")
if w_context_size != v_context_size:
raise ValueError("Please make sure the third dimension of weights matches"
" the second dimension of values.")
if w_context_size != m_context_size: def filter_weight_value(weights, values, valid_mask):
raise ValueError("Please make sure the third dimension of the weights" """Filters weights and values based on valid_mask.
" matches the second dimension of the valid_mask.")
valid_mask = valid_mask[..., tf.newaxis] _NEGATIVE_PADDING_VALUE will be added to invalid elements in the weights to
avoid their contribution in softmax. 0 will be set for the invalid elements in
the values.
# Force the invalid weights to be very negative so it won't contribute to Args:
# the softmax. weights: A float Tensor of shape [batch_size, input_size, context_size].
weights += tf.transpose( values: A float Tensor of shape [batch_size, context_size,
tf.cast(tf.math.logical_not(valid_mask), weights.dtype) * projected_dimension].
_NEGATIVE_PADDING_VALUE, valid_mask: A boolean Tensor of shape [batch_size, context_size]. True means
perm=[0, 2, 1]) valid and False means invalid.
# Force the invalid values to be 0. Returns:
values *= tf.cast(valid_mask, values.dtype) weights: A float Tensor of shape [batch_size, input_size, context_size].
values: A float Tensor of shape [batch_size, context_size,
projected_dimension].
return weights, values Raises:
ValueError: If shape of doesn't match.
"""
w_batch_size, _, w_context_size = weights.shape
v_batch_size, v_context_size, _ = values.shape
m_batch_size, m_context_size = valid_mask.shape
if w_batch_size != v_batch_size or v_batch_size != m_batch_size:
raise ValueError("Please make sure the first dimension of the input"
" tensors are the same.")
def run_projection(self, features, bottleneck_dimension, is_training, layer, normalize=True): if w_context_size != v_context_size:
"""Projects features to another feature space. raise ValueError("Please make sure the third dimension of weights matches"
" the second dimension of values.")
Args: if w_context_size != m_context_size:
features: A float Tensor of shape [batch_size, features_size, raise ValueError("Please make sure the third dimension of the weights"
num_features]. " matches the second dimension of the valid_mask.")
projection_dimension: A int32 Tensor.
is_training: A boolean Tensor (affecting batch normalization).
node: Contains a custom layer specific to the particular operation
being performed (key, value, query, features)
normalize: A boolean Tensor. If true, the output features will be l2
normalized on the last dimension.
Returns: valid_mask = valid_mask[..., tf.newaxis]
A float Tensor of shape [batch, features_size, projection_dimension].
"""
shape_arr = features.shape
batch_size, _, num_features = shape_arr
print("Orig", features.shape)
features = tf.reshape(features, [-1, num_features])
projected_features = layer(features, is_training) # Force the invalid weights to be very negative so it won't contribute to
# the softmax.
weights += tf.transpose(
tf.cast(tf.math.logical_not(valid_mask), weights.dtype) *
_NEGATIVE_PADDING_VALUE,
perm=[0, 2, 1])
projected_features = tf.reshape(projected_features, [batch_size, -1, bottleneck_dimension]) # Force the invalid values to be 0.
print(projected_features.shape) values *= tf.cast(valid_mask, values.dtype)
if normalize: return weights, values
projected_features = tf.keras.backend.l2_normalize(projected_features, axis=-1)
return projected_features def project_features(features, bottleneck_dimension, is_training, layer, normalize=True):
"""Projects features to another feature space.
def call(self, input_features, is_training, valid_mask): Args:
input_features, context_features = input_features features: A float Tensor of shape [batch_size, features_size,
with tf.variable_scope("AttentionBlock"): num_features].
queries = self.run_projection( projection_dimension: A int32 Tensor.
input_features, self.bottleneck_dimension, is_training, is_training: A boolean Tensor (affecting batch normalization).
self.query_proj, normalize=True) node: Contains a custom layer specific to the particular operation
keys = self.run_projection( being performed (key, value, query, features)
context_features, self.bottleneck_dimension, is_training, normalize: A boolean Tensor. If true, the output features will be l2
self.key_proj, normalize=True) normalized on the last dimension.
values = self.run_projection(
context_features, self.bottleneck_dimension, is_training,
self.val_proj, normalize=True)
weights = tf.matmul(queries, keys, transpose_b=True) Returns:
A float Tensor of shape [batch, features_size, projection_dimension].
"""
shape_arr = features.shape
batch_size, _, num_features = shape_arr
print("Orig", features.shape)
features = tf.reshape(features, [-1, num_features])
weights, values = self.filter_weight_value(weights, values, valid_mask) projected_features = layer(features, is_training)
weights = tf.nn.softmax(weights / self.attention_temperature) projected_features = tf.reshape(projected_features, [batch_size, -1, bottleneck_dimension])
print(projected_features.shape)
features = tf.matmul(weights, values) if normalize:
output_features = self.run_projection( projected_features = tf.keras.backend.l2_normalize(projected_features, axis=-1)
features, input_features.shape[-1], is_training,
self.feature_proj, normalize=False)
return output_features
return projected_features
def compute_valid_mask(num_valid_elements, num_elements): def compute_valid_mask(num_valid_elements, num_elements):
"""Computes mask of valid entries within padded context feature. """Computes mask of valid entries within padded context feature.
...@@ -222,6 +228,7 @@ def compute_box_context_attention(box_features, context_features, ...@@ -222,6 +228,7 @@ def compute_box_context_attention(box_features, context_features,
valid_mask = compute_valid_mask(valid_context_size, context_size) valid_mask = compute_valid_mask(valid_context_size, context_size)
channels = box_features.shape[-1] channels = box_features.shape[-1]
attention_block.set_output_dimension(channels)
# Average pools over height and width dimension so that the shape of # Average pools over height and width dimension so that the shape of
# box_features becomes [batch_size, max_num_proposals, channels]. # box_features becomes [batch_size, max_num_proposals, channels].
......
...@@ -80,9 +80,9 @@ class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase, ...@@ -80,9 +80,9 @@ class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase,
projected_features = context_rcnn_lib.project_features( projected_features = context_rcnn_lib.project_features(
features, features,
projection_dimension, projection_dimension,
is_training=is_training, is_training,
normalize=normalize, context_rcnn_lib.ContextProjection(projection_dimension, False),
node=context_rcnn_lib.ContextProjection(projection_dimension, False)) normalize=normalize)
# Makes sure the shape is correct. # Makes sure the shape is correct.
self.assertAllEqual(projected_features.shape, [2, 3, projection_dimension]) self.assertAllEqual(projected_features.shape, [2, 3, projection_dimension])
...@@ -100,15 +100,15 @@ class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase, ...@@ -100,15 +100,15 @@ class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase,
context_features = tf.ones([2, 2, 3], tf.float32) context_features = tf.ones([2, 2, 3], tf.float32)
valid_mask = tf.constant([[True, True], [False, False]], tf.bool) valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
is_training = False is_training = False
projection_layers = {context_rcnn_lib.KEY_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False), context_rcnn_lib.VALUE_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False), #projection_layers = {context_rcnn_lib.KEY_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False), context_rcnn_lib.VALUE_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False),
context_rcnn_lib.QUERY_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False)} # context_rcnn_lib.QUERY_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False)}
#Add in the feature layer because this is further down the pipeline and it isn't automatically injected. #Add in the feature layer because this is further down the pipeline and it isn't automatically injected.
projection_layers['feature'] = context_rcnn_lib.ContextProjection(output_dimension, False) #projection_layers['feature'] = context_rcnn_lib.ContextProjection(output_dimension, False)
output_features = context_rcnn_lib.attention_block( attention_block = context_rcnn_lib.AttentionBlock(bottleneck_dimension, attention_temperature, False)
input_features, context_features, bottleneck_dimension, attention_block.set_output_dimension(output_dimension)
output_dimension, attention_temperature, valid_mask, is_training, projection_layers) output_features = attention_block([input_features, context_features], is_training, valid_mask)
# Makes sure the shape is correct. # Makes sure the shape is correct.
self.assertAllEqual(output_features.shape, [2, 3, output_dimension]) self.assertAllEqual(output_features.shape, [2, 3, output_dimension])
...@@ -120,12 +120,11 @@ class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase, ...@@ -120,12 +120,11 @@ class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase,
valid_context_size = tf.constant((2, 3), tf.int32) valid_context_size = tf.constant((2, 3), tf.int32)
bottleneck_dimension = 10 bottleneck_dimension = 10
attention_temperature = 1 attention_temperature = 1
projection_layers = {context_rcnn_lib.KEY_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False), context_rcnn_lib.VALUE_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False),
context_rcnn_lib.QUERY_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False)}
attention_features = context_rcnn_lib.compute_box_context_attention( attention_features = context_rcnn_lib.compute_box_context_attention(
box_features, context_features, valid_context_size, box_features, context_features, valid_context_size,
bottleneck_dimension, attention_temperature, is_training, bottleneck_dimension, attention_temperature, is_training,
False, projection_layers) False, context_rcnn_lib.AttentionBlock(bottleneck_dimension, attention_temperature, False))
# Makes sure the shape is correct. # Makes sure the shape is correct.
self.assertAllEqual(attention_features.shape, [2, 3, 1, 1, 4]) self.assertAllEqual(attention_features.shape, [2, 3, 1, 1, 4])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment