[Bugfix][Hardware][POWERPC] Fix auto dtype failure in case of POWER10 (#11331)

Signed-off-by: Akash Kaothalkar <0052v2@linux.vnet.ibm.com>

[Bugfix][Hardware][POWERPC] Fix auto dtype failure in case of POWER10 (#11331)
Signed-off-by: Akash Kaothalkar <0052v2@linux.vnet.ibm.com>
48edab80 · Akash kaothalkar · GitHub · a985f7af · 48edab80
Unverified Commit 48edab80 authored Dec 20, 2024 by Akash kaothalkar Committed by GitHub Dec 20, 2024
Show whitespace changes
Inline Side-by-side

Showing with 12 additions and 1 deletion

vllm/config.py vllm/config.py +12 -1

No files found.
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -22,7 +22,7 @@ from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization import (QUANTIZATION_METHODS,
                                                     get_quantization_config)
 from vllm.model_executor.models import ModelRegistry
-from vllm.platforms import current_platform
+from vllm.platforms import current_platform, interface
 from vllm.tracing import is_otel_available, otel_import_error_traceback
 from vllm.transformers_utils.config import (
    ConfigFormat, get_config, get_hf_image_processor_config,
@@ -2199,6 +2199,17 @@ def _get_and_verify_dtype(
            else:
                torch_dtype = config_dtype

+            if (current_platform.is_cpu()
+                    and current_platform.get_cpu_architecture()
+                    == interface.CpuArchEnum.POWERPC
+                    and (config_dtype == torch.float16
+                         or config_dtype == torch.float32)):
+                logger.info(
+                    "For POWERPC, we cast models to bfloat16 instead of "
+                    "using float16 by default. Float16 is not currently "
+                    "supported for POWERPC.")
+                torch_dtype = torch.bfloat16
+
            if current_platform.is_hpu() and config_dtype == torch.float16:
                logger.info(
                    "For HPU, we cast models to bfloat16 instead of"