fix(server): t5 cannot run in f16 (#356)

Fix #349

fix(server): t5 cannot run in f16 (#356)
Fix #349
4f4c9c16 · OlivierDehaene · GitHub · 91d9beec · 4f4c9c16
Unverified Commit 4f4c9c16 authored May 23, 2023 by OlivierDehaene Committed by GitHub May 23, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

server/text_generation_server/models/t5.py server/text_generation_server/models/t5.py +1 -1

No files found.
--- a/server/text_generation_server/models/t5.py
+++ b/server/text_generation_server/models/t5.py
@@ -40,7 +40,7 @@ class T5Sharded(Seq2SeqLM):
        self.process_group, rank, world_size = initialize_torch_distributed()
        if torch.cuda.is_available():
            device = torch.device(f"cuda:{rank}")
-            dtype = torch.float16
+            dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float32
        else:
            device = torch.device("cpu")
            dtype = torch.float32