Unverified Commit 9e2fdf57 authored by Nicolas Patry's avatar Nicolas Patry Committed by GitHub
Browse files

Removing IPEX_AVAIL. (#2115)

* Removing IPEX_AVAIL.

Chose to unify CPU and XPU under `ipex`. Most code is exactly similar
except for a very few spots.

The biggest number of spots is the kv-cache layout and the flash_xxx.py
files.
Since those files should be removed soon and factored away, we should
not need them.

* Forgot a few places.

* Unrelated change.

* Fixing HF_TOKEN.

* HF_TOKEN
parent 3f3b7ffd
...@@ -3,7 +3,7 @@ import torch ...@@ -3,7 +3,7 @@ import torch
from datetime import timedelta from datetime import timedelta
from loguru import logger from loguru import logger
from text_generation_server.utils.import_utils import IPEX_AVAIL from text_generation_server.utils.import_utils import SYSTEM
# Tensor Parallelism settings # Tensor Parallelism settings
RANK = int(os.getenv("RANK", "0")) RANK = int(os.getenv("RANK", "0"))
...@@ -69,7 +69,7 @@ def initialize_torch_distributed(): ...@@ -69,7 +69,7 @@ def initialize_torch_distributed():
if not torch.distributed.is_initialized(): if not torch.distributed.is_initialized():
# Call the init process. # Call the init process.
if IPEX_AVAIL: if SYSTEM == "ipex":
import intel_extension_for_pytorch as ipex import intel_extension_for_pytorch as ipex
ipex.distributed.init_process_group( ipex.distributed.init_process_group(
......
...@@ -37,7 +37,10 @@ def get_cpu_free_memory(device, memory_fraction): ...@@ -37,7 +37,10 @@ def get_cpu_free_memory(device, memory_fraction):
return free_memory return free_memory
IPEX_AVAIL = is_ipex_available() def noop(*args, **kwargs):
pass
SYSTEM = None SYSTEM = None
if torch.version.hip is not None: if torch.version.hip is not None:
SYSTEM = "rocm" SYSTEM = "rocm"
...@@ -49,17 +52,19 @@ elif torch.version.cuda is not None and torch.cuda.is_available(): ...@@ -49,17 +52,19 @@ elif torch.version.cuda is not None and torch.cuda.is_available():
empty_cache = torch.cuda.empty_cache empty_cache = torch.cuda.empty_cache
synchronize = torch.cuda.synchronize synchronize = torch.cuda.synchronize
get_free_memory = get_cuda_free_memory get_free_memory = get_cuda_free_memory
elif IPEX_AVAIL and hasattr(torch, "xpu") and torch.xpu.is_available(): elif is_ipex_available():
SYSTEM = "xpu" SYSTEM = "ipex"
if hasattr(torch, "xpu") and torch.xpu.is_available():
empty_cache = torch.xpu.empty_cache empty_cache = torch.xpu.empty_cache
synchronize = torch.xpu.synchronize synchronize = torch.xpu.synchronize
get_free_memory = get_xpu_free_memory get_free_memory = get_xpu_free_memory
else:
empty_cache = noop
synchronize = noop
get_free_memory = get_cpu_free_memory
else: else:
SYSTEM = "cpu" SYSTEM = "cpu"
def noop(*args, **kwargs):
pass
empty_cache = noop empty_cache = noop
synchronize = noop synchronize = noop
get_free_memory = get_cpu_free_memory get_free_memory = get_cpu_free_memory
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment