Unverified Commit f9c069c8 authored by bnellnm's avatar bnellnm Committed by GitHub
Browse files

Modularize fused experts and integrate PPLX kernels (#15956)

parent 418d2f8b
...@@ -530,7 +530,8 @@ def init_worker_distributed_environment( ...@@ -530,7 +530,8 @@ def init_worker_distributed_environment(
init_distributed_environment(parallel_config.world_size, rank, init_distributed_environment(parallel_config.world_size, rank,
distributed_init_method, local_rank) distributed_init_method, local_rank)
ensure_model_parallel_initialized(parallel_config.tensor_parallel_size, ensure_model_parallel_initialized(parallel_config.tensor_parallel_size,
parallel_config.pipeline_parallel_size) parallel_config.pipeline_parallel_size,
parallel_config.enable_expert_parallel)
ensure_kv_transfer_initialized(vllm_config) ensure_kv_transfer_initialized(vllm_config)
......
...@@ -176,7 +176,8 @@ class XPUWorker(LoRANotSupportedWorkerBase, Worker): ...@@ -176,7 +176,8 @@ class XPUWorker(LoRANotSupportedWorkerBase, Worker):
ensure_model_parallel_initialized( ensure_model_parallel_initialized(
parallel_config.tensor_parallel_size, parallel_config.tensor_parallel_size,
parallel_config.pipeline_parallel_size) parallel_config.pipeline_parallel_size,
parallel_config.enable_expert_parallel)
# global all_reduce needed for overall oneccl warm up # global all_reduce needed for overall oneccl warm up
torch.distributed.all_reduce(torch.zeros(1).xpu()) torch.distributed.all_reduce(torch.zeros(1).xpu())
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment