Fix `EfficientFormer` (#21294)

* fix * fix checkpoint * fix style * tiny update Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>

Fix `EfficientFormer` (#21294)
* fix * fix checkpoint * fix style * tiny update Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
de2d793e · Yih-Dar · GitHub · 8788fd0c · de2d793e
Unverified Commit de2d793e authored Jan 25, 2023 by Yih-Dar Committed by GitHub Jan 25, 2023
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 2 deletions

src/transformers/models/efficientformer/modeling_efficientformer.py ...ormers/models/efficientformer/modeling_efficientformer.py +6 -2

No files found.
--- a/src/transformers/models/efficientformer/modeling_efficientformer.py
+++ b/src/transformers/models/efficientformer/modeling_efficientformer.py
@@ -42,7 +42,7 @@ logger = logging.get_logger(__name__)
 _CONFIG_FOR_DOC = "EfficientFormerConfig"
 # Base docstring
-_CHECKPOINT_FOR_DOC = "efficientformer-l1-300"
+_CHECKPOINT_FOR_DOC = "snap-research/efficientformer-l1-300"
 _EXPECTED_OUTPUT_SHAPE = [1, 197, 768]
 # Image classification docstring
@@ -51,7 +51,7 @@ _IMAGE_CLASS_EXPECTED_OUTPUT = "Egyptian cat"
 EFFICIENTFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [
-    "huggingface/efficientformer-l1-300",
+    "snap-research/efficientformer-l1-300",
    # See all EfficientFormer models at https://huggingface.co/models?filter=efficientformer
 ]
@@ -133,6 +133,10 @@ class EfficientFormerSelfAttention(nn.Module):
        key_layer = key_layer.permute(0, 2, 1, 3)
        value_layer = value_layer.permute(0, 2, 1, 3)
+        # set `model.to(torch_device)` won't change `self.ab.device`, if there is no follow-up `train` or `eval` call.
+        # Let's do it manually here, so users won't have to do this everytime.
+        if not self.training:
+            self.ab = self.ab.to(self.attention_biases.device)
        attention_probs = (torch.matmul(query_layer, key_layer.transpose(-2, -1))) * self.scale + (
            self.attention_biases[:, self.attention_bias_idxs] if self.training else self.ab
        )