添加ep moe tuning

10213be4 · 王敏 · 7f775ad2 · 10213be4 · 10213be4
Commit 10213be4 authored Feb 11, 2025 by 王敏
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 9 deletions

benchmarks/kernels/benchmark_moe.py benchmarks/kernels/benchmark_moe.py +6 -5

vllm/model_executor/models/mixtral.py vllm/model_executor/models/mixtral.py +5 -4

No files found.
--- a/benchmarks/kernels/benchmark_moe.py
+++ b/benchmarks/kernels/benchmark_moe.py
@@ -354,14 +354,14 @@ def merge_unique_dicts(list1, list2):
 @ray.remote(num_gpus=1)
 class BenchmarkWorker:

-    def __init__(self, seed: int) -> None:
-        torch.set_default_device("cuda")
+    def __init__(self, seed: int, device_id: int) -> None:
+        torch.set_default_device("cuda:"+ str(device_id))
        current_platform.seed_everything(seed)
        self.seed = seed
        # Get the device ID to allocate tensors and kernels
        # on the respective GPU. This is required for Ray to work
        # correctly with multi-GPU tuning on the ROCm platform.
-        self.device_id = int(ray.get_gpu_ids()[0])
+        self.device_id = device_id

    def benchmark(
        self,
@@ -580,9 +580,9 @@ def main(args: argparse.Namespace):

    ray.init(address=None,
                 ignore_reinit_error=True,
-                 num_gpus=1)
+                 num_gpus=args.num_gpus)
    num_gpus = int(ray.available_resources()["GPU"])
-    workers = [BenchmarkWorker.remote(args.seed) for _ in range(num_gpus)]
+    workers = [BenchmarkWorker.remote(args.seed, i) for i in range(num_gpus)]

    def _distribute(method: str, inputs: List[Any]) -> List[Any]:
        outputs = []
@@ -644,6 +644,7 @@ if __name__ == "__main__":
    parser.add_argument("--nn_moe", type=bool, default=True)
    parser.add_argument("--trust-remote-code", action="store_true")
    parser.add_argument("--moe-ep-size", type=int, default=1)
+    parser.add_argument("--num-gpus", type=int, default=1)
    args = parser.parse_args()

    main(args)
--- a/vllm/model_executor/models/mixtral.py
+++ b/vllm/model_executor/models/mixtral.py
@@ -502,10 +502,7 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
                    if ((name.endswith(".bias") or name.endswith("_bias"))
                            and name not in params_dict):
                        continue
-                    
-                    # Skip loading extra expert weights for ep moe mode
-                    if name not in params_dict:
-                        continue
+
                    param = params_dict[name]
                    weight_loader = param.weight_loader
                    weight_loader(param,
@@ -527,6 +524,10 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
                    if name is None:
                        continue

+                    # Skip loading extra expert weights for ep moe mode
+                    if name not in params_dict:
+                        continue
+
                    param = params_dict[name]
                    weight_loader = getattr(param, "weight_loader",
                                            default_weight_loader)