Merge pull request #87 from lostmsu/main

Add `device` and `dtype` parameters to `StableEmbedding`

Merge pull request #87 from lostmsu/main
Add `device` and `dtype` parameters to `StableEmbedding`
9d353ca7 · Tim Dettmers · GitHub · 7a6563b6 · 62d39a23 · 9d353ca7
Unverified Commit 9d353ca7 authored Jan 02, 2023 by Tim Dettmers Committed by GitHub Jan 02, 2023
Show whitespace changes
Inline Side-by-side

Showing with 9 additions and 2 deletions

bitsandbytes/nn/modules.py bitsandbytes/nn/modules.py +9 -2

No files found.
--- a/bitsandbytes/nn/modules.py
+++ b/bitsandbytes/nn/modules.py
@@ -25,6 +25,8 @@ class StableEmbedding(torch.nn.Embedding):
        scale_grad_by_freq: bool = False,
        sparse: bool = False,
        _weight: Optional[Tensor] = None,
+        device=None,
+        dtype=None,
    ) -> None:
        super().__init__(
            num_embeddings,
@@ -35,8 +37,10 @@ class StableEmbedding(torch.nn.Embedding):
            scale_grad_by_freq,
            sparse,
            _weight,
+            device,
+            dtype,
        )
-        self.norm = torch.nn.LayerNorm(embedding_dim)
+        self.norm = torch.nn.LayerNorm(embedding_dim, device=device)
        GlobalOptimManager.get_instance().register_module_override(
            self, "weight", {"optim_bits": 32}
        )
@@ -68,7 +72,10 @@ class StableEmbedding(torch.nn.Embedding):
            self.sparse,
        )

-        return self.norm(emb)
+        # always apply layer norm in full precision
+        emb = emb.to(torch.get_default_dtype())
+
+        return self.norm(emb).to(self.weight.dtype)


 class Embedding(torch.nn.Embedding):