Unverified Commit 8fbcfd07 authored by Ke Bao's avatar Ke Bao Committed by GitHub
Browse files

Update step3v default config (#8626)

parent 3c307dc0
...@@ -112,6 +112,7 @@ class ModelConfig: ...@@ -112,6 +112,7 @@ class ModelConfig:
mm_disabled_models = [ mm_disabled_models = [
"Gemma3ForConditionalGeneration", "Gemma3ForConditionalGeneration",
"Llama4ForConditionalGeneration", "Llama4ForConditionalGeneration",
"Step3VLForConditionalGeneration",
] ]
if self.hf_config.architectures[0] in mm_disabled_models: if self.hf_config.architectures[0] in mm_disabled_models:
enable_multimodal = False enable_multimodal = False
......
...@@ -868,7 +868,6 @@ class Step3VLForConditionalGeneration(nn.Module): ...@@ -868,7 +868,6 @@ class Step3VLForConditionalGeneration(nn.Module):
) )
def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
# TODO:
stacked_params_mapping = [ stacked_params_mapping = [
# (param_name, shard_name, shard_id) # (param_name, shard_name, shard_id)
(".qkv_proj", ".q_proj", 0), (".qkv_proj", ".q_proj", 0),
...@@ -901,9 +900,7 @@ class Step3VLForConditionalGeneration(nn.Module): ...@@ -901,9 +900,7 @@ class Step3VLForConditionalGeneration(nn.Module):
for name, loaded_weight in weights: for name, loaded_weight in weights:
if "vision_model" in name: if "vision_model" in name:
# 1.It’s not great, but let’s leave it like this for now
name = name.replace("self_attn", "self_attn.attn") name = name.replace("self_attn", "self_attn.attn")
# 2.
name = name.replace("out_proj", "proj") name = name.replace("out_proj", "proj")
# TODO: support vision model # TODO: support vision model
......
...@@ -2344,6 +2344,7 @@ def is_fa3_default_architecture(hf_config): ...@@ -2344,6 +2344,7 @@ def is_fa3_default_architecture(hf_config):
"Qwen3ForCausalLM", "Qwen3ForCausalLM",
"Qwen3MoeForCausalLM", "Qwen3MoeForCausalLM",
"Glm4MoeForCausalLM", "Glm4MoeForCausalLM",
"Step3VLForConditionalGeneration",
} }
return architectures[0] in default_archs return architectures[0] in default_archs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment