Commit 10213be4 authored by 王敏's avatar 王敏
Browse files

添加ep moe tuning

parent 7f775ad2
...@@ -354,14 +354,14 @@ def merge_unique_dicts(list1, list2): ...@@ -354,14 +354,14 @@ def merge_unique_dicts(list1, list2):
@ray.remote(num_gpus=1) @ray.remote(num_gpus=1)
class BenchmarkWorker: class BenchmarkWorker:
def __init__(self, seed: int) -> None: def __init__(self, seed: int, device_id: int) -> None:
torch.set_default_device("cuda") torch.set_default_device("cuda:"+ str(device_id))
current_platform.seed_everything(seed) current_platform.seed_everything(seed)
self.seed = seed self.seed = seed
# Get the device ID to allocate tensors and kernels # Get the device ID to allocate tensors and kernels
# on the respective GPU. This is required for Ray to work # on the respective GPU. This is required for Ray to work
# correctly with multi-GPU tuning on the ROCm platform. # correctly with multi-GPU tuning on the ROCm platform.
self.device_id = int(ray.get_gpu_ids()[0]) self.device_id = device_id
def benchmark( def benchmark(
self, self,
...@@ -580,9 +580,9 @@ def main(args: argparse.Namespace): ...@@ -580,9 +580,9 @@ def main(args: argparse.Namespace):
ray.init(address=None, ray.init(address=None,
ignore_reinit_error=True, ignore_reinit_error=True,
num_gpus=1) num_gpus=args.num_gpus)
num_gpus = int(ray.available_resources()["GPU"]) num_gpus = int(ray.available_resources()["GPU"])
workers = [BenchmarkWorker.remote(args.seed) for _ in range(num_gpus)] workers = [BenchmarkWorker.remote(args.seed, i) for i in range(num_gpus)]
def _distribute(method: str, inputs: List[Any]) -> List[Any]: def _distribute(method: str, inputs: List[Any]) -> List[Any]:
outputs = [] outputs = []
...@@ -644,6 +644,7 @@ if __name__ == "__main__": ...@@ -644,6 +644,7 @@ if __name__ == "__main__":
parser.add_argument("--nn_moe", type=bool, default=True) parser.add_argument("--nn_moe", type=bool, default=True)
parser.add_argument("--trust-remote-code", action="store_true") parser.add_argument("--trust-remote-code", action="store_true")
parser.add_argument("--moe-ep-size", type=int, default=1) parser.add_argument("--moe-ep-size", type=int, default=1)
parser.add_argument("--num-gpus", type=int, default=1)
args = parser.parse_args() args = parser.parse_args()
main(args) main(args)
...@@ -502,10 +502,7 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ...@@ -502,10 +502,7 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
if ((name.endswith(".bias") or name.endswith("_bias")) if ((name.endswith(".bias") or name.endswith("_bias"))
and name not in params_dict): and name not in params_dict):
continue continue
# Skip loading extra expert weights for ep moe mode
if name not in params_dict:
continue
param = params_dict[name] param = params_dict[name]
weight_loader = param.weight_loader weight_loader = param.weight_loader
weight_loader(param, weight_loader(param,
...@@ -527,6 +524,10 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ...@@ -527,6 +524,10 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
if name is None: if name is None:
continue continue
# Skip loading extra expert weights for ep moe mode
if name not in params_dict:
continue
param = params_dict[name] param = params_dict[name]
weight_loader = getattr(param, "weight_loader", weight_loader = getattr(param, "weight_loader",
default_weight_loader) default_weight_loader)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment