"examples/seq2seq/utils.py" did not exist on "eaabaaf750613e00fdece7245ef04ca3d1a775a9"
Commit e0b10e40 authored by wanglch's avatar wanglch
Browse files

Initial commit

parent ce623fe2
......@@ -6,6 +6,8 @@ from transformers.activations import ACT2FN
import math
from torch.nn import LayerNorm
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def standard_attention(query_layer, key_layer, value_layer, scaling_attention_score=True):
if scaling_attention_score:
query_layer = query_layer / math.sqrt(query_layer.shape[-1])
......@@ -39,7 +41,7 @@ class PatchEmbedding(nn.Module):
self.position_embedding = nn.Embedding(config.num_positions, config.hidden_size)
def forward(self, images: "tensor(B, C, H, W)") -> "tensor(B, L, D)":
x = self.proj(images)
x = self.proj(images).to(device)
x = x.flatten(2).transpose(1, 2)
cls_token = self.cls_embedding.expand(x.shape[0], -1, -1)
x = torch.cat((cls_token, x), dim=1)
......@@ -66,7 +68,7 @@ class Attention(nn.Module):
out = attention_fn_default(
q, k, v
)
output = self.dense(out.transpose(1, 2).view(B, L, -1))
output = self.dense(out.transpose(1, 2).reshape(B, L, -1))
output = self.output_dropout(output)
return output
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment