"git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "1bcd19e4d0225826aa08b8fcab2e3b566ce1e143"
Commit 03fffb98 authored by Alexei Baevski's avatar Alexei Baevski Committed by Myle Ott
Browse files

fix sinusoidal embedding init size

parent 930c9580
...@@ -408,7 +408,7 @@ def PositionalEmbedding(num_embeddings, embedding_dim, padding_idx, left_pad, le ...@@ -408,7 +408,7 @@ def PositionalEmbedding(num_embeddings, embedding_dim, padding_idx, left_pad, le
nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5) nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5)
nn.init.constant_(m.weight[padding_idx], 0) nn.init.constant_(m.weight[padding_idx], 0)
else: else:
m = SinusoidalPositionalEmbedding(embedding_dim, padding_idx, left_pad, init_size=num_embeddings) m = SinusoidalPositionalEmbedding(embedding_dim, padding_idx, left_pad)
return m return m
......
...@@ -56,12 +56,12 @@ class SinusoidalPositionalEmbedding(nn.Module): ...@@ -56,12 +56,12 @@ class SinusoidalPositionalEmbedding(nn.Module):
# recompute/expand embeddings if needed # recompute/expand embeddings if needed
bsz, seq_len = input.size() bsz, seq_len = input.size()
max_pos = self.padding_idx + 1 + seq_len max_pos = self.padding_idx + 1 + seq_len
if max_pos > self.weights.size(0): if self.weights is None or max_pos > self.weights.size(0):
self.weights = SinusoidalPositionalEmbedding.get_embedding( self.weights = SinusoidalPositionalEmbedding.get_embedding(
max_pos, max_pos,
self.embedding_dim, self.embedding_dim,
self.padding_idx, self.padding_idx,
).type_as(self.weights) )
self.weights = self.weights.type_as(self._float_tensor) self.weights = self.weights.type_as(self._float_tensor)
if incremental_state is not None: if incremental_state is not None:
...@@ -69,7 +69,7 @@ class SinusoidalPositionalEmbedding(nn.Module): ...@@ -69,7 +69,7 @@ class SinusoidalPositionalEmbedding(nn.Module):
return self.weights[self.padding_idx + seq_len, :].expand(bsz, 1, -1) return self.weights[self.padding_idx + seq_len, :].expand(bsz, 1, -1)
positions = utils.make_positions(input.data, self.padding_idx, self.left_pad) positions = utils.make_positions(input.data, self.padding_idx, self.left_pad)
return self.weights.index_select(0, positions.view(-1)).view(bsz, seq_len, -1) return self.weights.index_select(0, positions.view(-1)).view(bsz, seq_len, -1).detach()
def max_positions(self): def max_positions(self):
"""Maximum number of supported positions.""" """Maximum number of supported positions."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment