Unverified Commit 7a587349 authored by Yao Matrix's avatar Yao Matrix Committed by GitHub
Browse files

xpu enabling for 4 cases (#12345)


Signed-off-by: default avatarYao, Matrix <matrix.yao@intel.com>
parent 9ef11850
...@@ -25,6 +25,7 @@ from ..utils import ( ...@@ -25,6 +25,7 @@ from ..utils import (
is_accelerate_available, is_accelerate_available,
logging, logging,
) )
from ..utils.torch_utils import get_device
if is_accelerate_available(): if is_accelerate_available():
...@@ -161,7 +162,9 @@ class AutoOffloadStrategy: ...@@ -161,7 +162,9 @@ class AutoOffloadStrategy:
current_module_size = model.get_memory_footprint() current_module_size = model.get_memory_footprint()
mem_on_device = torch.cuda.mem_get_info(execution_device.index)[0] device_type = execution_device.type
device_module = getattr(torch, device_type, torch.cuda)
mem_on_device = device_module.mem_get_info(execution_device.index)[0]
mem_on_device = mem_on_device - self.memory_reserve_margin mem_on_device = mem_on_device - self.memory_reserve_margin
if current_module_size < mem_on_device: if current_module_size < mem_on_device:
return [] return []
...@@ -301,7 +304,7 @@ class ComponentsManager: ...@@ -301,7 +304,7 @@ class ComponentsManager:
cm.add("vae", vae_model, collection="sdxl") cm.add("vae", vae_model, collection="sdxl")
# Enable auto offloading # Enable auto offloading
cm.enable_auto_cpu_offload(device="cuda") cm.enable_auto_cpu_offload()
# Retrieve components # Retrieve components
unet = cm.get_one(name="unet", collection="sdxl") unet = cm.get_one(name="unet", collection="sdxl")
...@@ -490,6 +493,8 @@ class ComponentsManager: ...@@ -490,6 +493,8 @@ class ComponentsManager:
gc.collect() gc.collect()
if torch.cuda.is_available(): if torch.cuda.is_available():
torch.cuda.empty_cache() torch.cuda.empty_cache()
if torch.xpu.is_available():
torch.xpu.empty_cache()
# YiYi TODO: rename to search_components for now, may remove this method # YiYi TODO: rename to search_components for now, may remove this method
def search_components( def search_components(
...@@ -678,7 +683,7 @@ class ComponentsManager: ...@@ -678,7 +683,7 @@ class ComponentsManager:
return get_return_dict(matches, return_dict_with_names) return get_return_dict(matches, return_dict_with_names)
def enable_auto_cpu_offload(self, device: Union[str, int, torch.device] = "cuda", memory_reserve_margin="3GB"): def enable_auto_cpu_offload(self, device: Union[str, int, torch.device] = None, memory_reserve_margin="3GB"):
""" """
Enable automatic CPU offloading for all components. Enable automatic CPU offloading for all components.
...@@ -704,6 +709,8 @@ class ComponentsManager: ...@@ -704,6 +709,8 @@ class ComponentsManager:
self.disable_auto_cpu_offload() self.disable_auto_cpu_offload()
offload_strategy = AutoOffloadStrategy(memory_reserve_margin=memory_reserve_margin) offload_strategy = AutoOffloadStrategy(memory_reserve_margin=memory_reserve_margin)
if device is None:
device = get_device()
device = torch.device(device) device = torch.device(device)
if device.index is None: if device.index is None:
device = torch.device(f"{device.type}:{0}") device = torch.device(f"{device.type}:{0}")
......
...@@ -253,6 +253,7 @@ class HunyuanVideoLoRAIntegrationTests(unittest.TestCase): ...@@ -253,6 +253,7 @@ class HunyuanVideoLoRAIntegrationTests(unittest.TestCase):
expected_slices = Expectations( expected_slices = Expectations(
{ {
("cuda", 7): np.array([0.1013, 0.1924, 0.0078, 0.1021, 0.1929, 0.0078, 0.1023, 0.1919, 0.7402, 0.104, 0.4482, 0.7354, 0.0925, 0.4382, 0.7275, 0.0815]), ("cuda", 7): np.array([0.1013, 0.1924, 0.0078, 0.1021, 0.1929, 0.0078, 0.1023, 0.1919, 0.7402, 0.104, 0.4482, 0.7354, 0.0925, 0.4382, 0.7275, 0.0815]),
("xpu", 3): np.array([0.1013, 0.1924, 0.0078, 0.1021, 0.1929, 0.0078, 0.1023, 0.1919, 0.7402, 0.104, 0.4482, 0.7354, 0.0925, 0.4382, 0.7275, 0.0815]),
} }
) )
# fmt: on # fmt: on
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment