Fix cuda reinitialization in a multiprocessing setting (#862)

99f4156f · q.yao · GitHub · 20d8f47a · 99f4156f
Unverified Commit 99f4156f authored Dec 18, 2023 by q.yao Committed by GitHub Dec 18, 2023
Show whitespace changes
Inline Side-by-side

Showing with 14 additions and 8 deletions

lmdeploy/turbomind/deploy/target_model/base.py lmdeploy/turbomind/deploy/target_model/base.py +14 -8

No files found.
--- a/lmdeploy/turbomind/deploy/target_model/base.py
+++ b/lmdeploy/turbomind/deploy/target_model/base.py
@@ -83,12 +83,18 @@ class TurbomindModelConfig:
        return True
-_WEIGHT_DTYPE_MAP = dict(
+def _weight_dtype_map(weight_type: str, default=None):
+    """get weight dtype map."""
+    _WEIGHT_DTYPE_MAP = dict(
        int4=torch.float16,
        fp16=torch.float16,
        fp32=torch.float16,
-    bf16=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
+        bf16=torch.bfloat16
-)
+        if torch.cuda.is_bf16_supported() else torch.float16,
+    )
+    return _WEIGHT_DTYPE_MAP.get(weight_type, default)
 class BaseOutputModel(ABC):
@@ -153,7 +159,7 @@ class BaseOutputModel(ABC):
        if self.to_file:
            if torch.is_floating_point(param):
-                torch_type = _WEIGHT_DTYPE_MAP.get(self.cfg.weight_type,
+                torch_type = _weight_dtype_map(self.cfg.weight_type,
                                               torch.float16)
                param = param.to(torch_type)
            tprint(name, param.shape)