Commit 2d364c4e authored by zhuwenwen's avatar zhuwenwen
Browse files

add step3-vl tuning

parent eba84521
...@@ -682,6 +682,11 @@ def main(args: argparse.Namespace): ...@@ -682,6 +682,11 @@ def main(args: argparse.Namespace):
topk = config.num_experts_per_tok topk = config.num_experts_per_tok
intermediate_size = config.moe_intermediate_size intermediate_size = config.moe_intermediate_size
shard_intermediate_size = 2 * intermediate_size // tp_size shard_intermediate_size = 2 * intermediate_size // tp_size
elif config.architectures[0] in ("Step3VLForConditionalGeneration"):
E = config.text_config.moe_num_experts
topk = config.text_config.moe_top_k
intermediate_size = config.text_config.moe_intermediate_size
shard_intermediate_size = 2 * intermediate_size // tp_size
else: else:
# Support for llama4 # Support for llama4
config = config.get_text_config() config = config.get_text_config()
......
...@@ -4948,4 +4948,4 @@ def get_layers_from_vllm_config(vllm_config: VllmConfig, ...@@ -4948,4 +4948,4 @@ def get_layers_from_vllm_config(vllm_config: VllmConfig,
for layer_name, layer in for layer_name, layer in
vllm_config.compilation_config.static_forward_context.items() vllm_config.compilation_config.static_forward_context.items()
if isinstance(layer, layer_type) if isinstance(layer, layer_type)
} }
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment