Unverified Commit f31a1dad authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: grab num_experts info from model info if possible (#4060)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
Co-authored-by: default avatarhongkuanz <hongkuanz@nvidia.com>
Co-authored-by: default avatarHongkuan Zhou <tedzhouhk@gmail.com>
parent 92909c8a
...@@ -97,25 +97,35 @@ async def run_profile(args): ...@@ -97,25 +97,35 @@ async def run_profile(args):
config = config_modifier.update_image(config, args.dgd_image) config = config_modifier.update_image(config, args.dgd_image)
logger.info(f"Using DGD image: {args.dgd_image}") logger.info(f"Using DGD image: {args.dgd_image}")
profile_num_gpus = [
2**i
for i in range(int(math.log2(args.max_num_gpus_per_engine)) + 1)
if args.min_num_gpus_per_engine <= 2**i <= args.max_num_gpus_per_engine
]
if args.is_moe_model: if args.is_moe_model:
# For MoE models, use range with stride of num_gpus_per_node # Filter GPU counts to only include divisors of num_experts
profile_num_gpus = list( if hasattr(args, "num_experts") and args.num_experts is not None:
range( original_counts = profile_num_gpus.copy()
args.min_num_gpus_per_engine, profile_num_gpus = [
args.max_num_gpus_per_engine + 1, gpu_count
args.num_gpus_per_node, for gpu_count in profile_num_gpus
) if args.num_experts % gpu_count == 0
) ]
if not profile_num_gpus:
error_msg = (
f"No valid GPU counts found that divide evenly into num_experts={args.num_experts}. "
f"Original candidates were {original_counts}. "
f"Valid divisors in range would be: {[d for d in range(args.min_num_gpus_per_engine, args.max_num_gpus_per_engine + 1) if args.num_experts % d == 0]}"
)
logger.error(error_msg)
raise ValueError(error_msg)
if len(profile_num_gpus) < len(original_counts):
logger.info(
f"Filtered GPU counts from {original_counts} to {profile_num_gpus} "
f"(only divisors of num_experts={args.num_experts})"
)
logger.info(f"Profiling MoE GPU counts (TEP/DEP): {profile_num_gpus}") logger.info(f"Profiling MoE GPU counts (TEP/DEP): {profile_num_gpus}")
else: else:
# For dense models, use powers of 2
profile_num_gpus = [
2**i
for i in range(int(math.log2(args.max_num_gpus_per_engine)) + 1)
if args.min_num_gpus_per_engine
<= 2**i
<= args.max_num_gpus_per_engine
]
logger.info(f"Profiling dense model GPU counts (TP): {profile_num_gpus}") logger.info(f"Profiling dense model GPU counts (TP): {profile_num_gpus}")
os.makedirs(args.output_dir, exist_ok=True) os.makedirs(args.output_dir, exist_ok=True)
......
...@@ -129,10 +129,27 @@ def get_model_info( ...@@ -129,10 +129,27 @@ def get_model_info(
max_context_length = value max_context_length = value
break break
# Detect number of experts for MoE models
# Different models use different attribute names
num_experts = None
if config.is_moe:
expert_attrs = [
"n_routed_experts", # DeepSeek V3/R1
"num_local_experts", # Mixtral, Qwen
"num_experts", # Generic
]
for attr in expert_attrs:
if hasattr(config, attr):
value = getattr(config, attr)
if value is not None:
num_experts = value
break
return { return {
"model_size": model_size, "model_size": model_size,
"is_moe": config.is_moe, "is_moe": config.is_moe,
"max_context_length": max_context_length, "max_context_length": max_context_length,
"num_experts": num_experts,
} }
......
...@@ -60,8 +60,13 @@ def auto_generate_search_space(args: argparse.Namespace) -> None: ...@@ -60,8 +60,13 @@ def auto_generate_search_space(args: argparse.Namespace) -> None:
model_info = get_model_info(args.model) model_info = get_model_info(args.model)
gpu_info = get_gpu_summary() gpu_info = get_gpu_summary()
num_experts_str = (
f", num_experts={model_info['num_experts']}"
if model_info.get("num_experts")
else ""
)
logger.info( logger.info(
f"Model {args.model} has size {model_info['model_size']}, is_moe={model_info['is_moe']}, and max_context_length={model_info['max_context_length']}" f"Model {args.model} has size {model_info['model_size']}, is_moe={model_info['is_moe']}, and max_context_length={model_info['max_context_length']}{num_experts_str}"
) )
logger.info( logger.info(
f"Cluster has {gpu_info['gpus_per_node']}x{gpu_info['model']} GPUs per node with {gpu_info['vram']} VRAM" f"Cluster has {gpu_info['gpus_per_node']}x{gpu_info['model']} GPUs per node with {gpu_info['vram']} VRAM"
...@@ -88,5 +93,6 @@ def auto_generate_search_space(args: argparse.Namespace) -> None: ...@@ -88,5 +93,6 @@ def auto_generate_search_space(args: argparse.Namespace) -> None:
args.is_moe_model = model_info["is_moe"] # type: ignore[assignment] args.is_moe_model = model_info["is_moe"] # type: ignore[assignment]
args.max_context_length = model_info["max_context_length"] # type: ignore[assignment] args.max_context_length = model_info["max_context_length"] # type: ignore[assignment]
args.num_gpus_per_node = gpu_info["gpus_per_node"] # type: ignore[assignment] args.num_gpus_per_node = gpu_info["gpus_per_node"] # type: ignore[assignment]
args.num_experts = model_info.get("num_experts") # type: ignore[assignment]
return return
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment