[Model] Add step3 vl

53ffe40e · zhuwenwen · 2db9a54d · 53ffe40e · 53ffe40e · 53ffe40e
Commit 53ffe40e authored Aug 01, 2025 by zhuwenwen
7 changed files
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -598,6 +598,7 @@ Specified using `--task generate`.
 | `Qwen2_5OmniThinkerForConditionalGeneration` | Qwen2.5-Omni                                                             | T + I<sup>E+</sup> + V<sup>E+</sup> + A<sup>+</sup>                   | `Qwen/Qwen2.5-Omni-7B`                                                                                                                                  |                        | ✅︎                          | ✅︎\*                  |
 | `SkyworkR1VChatModel`                        | Skywork-R1V-38B                                                          | T + I                                                                 | `Skywork/Skywork-R1V-38B`                                                                                                                               |                        | ✅︎                          | ✅︎                    |
 | `SmolVLMForConditionalGeneration`            | SmolVLM2                                                                 | T + I                                                                 | `SmolVLM2-2.2B-Instruct`                                                                                                                                | ✅︎                     |                             | ✅︎                    |
+| `Step3VLForConditionalGeneration` | Step3-VL | T + I<sup>+</sup> | `stepfun-ai/step3` | | ✅︎ | ✅︎ |
 | `TarsierForConditionalGeneration`            | Tarsier                                                                  | T + I<sup>E+</sup>                                                    | `omni-search/Tarsier-7b`,`omni-search/Tarsier-34b`                                                                                                      |                        | ✅︎                          | ✅︎                    |
 | `Tarsier2ForConditionalGeneration`<sup>^</sup> | Tarsier2                                                                 | T + I<sup>E+</sup> + V<sup>E+</sup>                                                | `omni-research/Tarsier2-Recap-7b`,`omni-research/Tarsier2-7b-0115`                                                                                      |                        | ✅︎                          | ✅︎                    |


--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -266,6 +266,9 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
    "StableLMEpochForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"stabilityai/stablelm-zephyr-3b")),  # noqa: E501
    "StableLmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"stabilityai/stablelm-3b-4e1t")),
    "Starcoder2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"bigcode/starcoder2-3b")),
+    "Step3TextForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stepfun-ai/step3"),
+                                            trust_remote_code=True,
+                                            is_available_online=False),
    "SolarForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"upstage/solar-pro-preview-instruct")),
    "TeleChat2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"Tele-AI/TeleChat2-3B"),
                                            trust_remote_code=True),
@@ -423,6 +426,9 @@ _MULTIMODAL_EXAMPLE_MODELS = {
    "Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-7B-AWQ")),  # noqa: E501
    "SkyworkR1VChatModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Skywork/Skywork-R1V-38B")),
    "SmolVLMForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM2-2.2B-Instruct")),  # noqa: E501
+    "Step3VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "stepfun-ai/step3"),
+                                                        trust_remote_code=True,
+                                                        is_available_online=False),
    "UltravoxModel": _HfExamplesInfo(os.path.join(models_path_prefix, "fixie-ai/ultravox-v0_5-llama-3_2-1b"),  # noqa: E501
                                     trust_remote_code=True),
    "TarsierForConditionalGeneration": _HfExamplesInfo("omni-research/Tarsier-7b",  # noqa: E501

--- a/vllm/entrypoints/openai/tool_parsers/__init__.py
+++ b/vllm/entrypoints/openai/tool_parsers/__init__.py
@@ -15,6 +15,7 @@ from .minimax_tool_parser import MinimaxToolParser
 from .mistral_tool_parser import MistralToolParser
 from .phi4mini_tool_parser import Phi4MiniJsonToolParser
 from .pythonic_tool_parser import PythonicToolParser
+from .step3_tool_parser import Step3ToolParser
 from .xlam_tool_parser import xLAMToolParser

 __all__ = [
@@ -31,6 +32,7 @@ __all__ = [
    "PythonicToolParser", 
    "Phi4MiniJsonToolParser",
    "DeepSeekV3ToolParser", 
+    "Step3ToolParser",
    "xLAMToolParser", 
    "MinimaxToolParser",
    "Glm4MoeModelToolParser",

--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -120,6 +120,7 @@ _TEXT_GENERATION_MODELS = {
    "Qwen3ForCausalLM": ("qwen3", "Qwen3ForCausalLM"),
    "Qwen3MoeForCausalLM": ("qwen3_moe", "Qwen3MoeForCausalLM"),
    "RWForCausalLM": ("falcon", "FalconForCausalLM"),
+    "Step3TextForCausalLM": ("step3_text", "Step3TextForCausalLM"),
    "StableLMEpochForCausalLM": ("stablelm", "StablelmForCausalLM"),
    "StableLmForCausalLM": ("stablelm", "StablelmForCausalLM"),
    "Starcoder2ForCausalLM": ("starcoder2", "Starcoder2ForCausalLM"),
@@ -228,6 +229,7 @@ _MULTIMODAL_MODELS = {
    "Qwen2_5OmniModel": ("qwen2_5_omni_thinker", "Qwen2_5OmniThinkerForConditionalGeneration"),  # noqa: E501
    "Qwen2_5OmniForConditionalGeneration": ("qwen2_5_omni_thinker", "Qwen2_5OmniThinkerForConditionalGeneration"),  # noqa: E501
    "UltravoxModel": ("ultravox", "UltravoxModel"),
+    "Step3VLForConditionalGeneration": ("step3_vl", "Step3VLForConditionalGeneration"),  # noqa: E501
    "Phi4MMForCausalLM": ("phi4mm", "Phi4MMForCausalLM"),
    "TarsierForConditionalGeneration": ("tarsier", "TarsierForConditionalGeneration"),  # noqa: E501
    "Tarsier2ForConditionalGeneration": ("qwen2_vl", "Tarsier2ForConditionalGeneration"),  # noqa: E501

--- a/vllm/reasoning/__init__.py
+++ b/vllm/reasoning/__init__.py
@@ -6,6 +6,7 @@ from .deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
 from .glm4_moe_reasoning_parser import Glm4MoeModelReasoningParser
 from .granite_reasoning_parser import GraniteReasoningParser
 from .qwen3_reasoning_parser import Qwen3ReasoningParser
+from .step3_reasoning_parser import Step3ReasoningParser

 __all__ = [
    "ReasoningParser",
@@ -14,4 +15,5 @@ __all__ = [
    "GraniteReasoningParser",
    "Qwen3ReasoningParser",
    "Glm4MoeModelReasoningParser",
+    "Step3ReasoningParser",
 ]
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -39,6 +39,7 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, Cohere2Config,
                                             MLPSpeculatorConfig, MPTConfig,
                                             NemotronConfig, NVLM_D_Config,
                                             OvisConfig, RWConfig,
+                                             Step3TextConfig, Step3VLConfig, 
                                             SkyworkR1VChatConfig, SolarConfig,
                                             Telechat2Config, UltravoxConfig)
 # yapf: enable
@@ -97,6 +98,8 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = {
    "skywork_chat": SkyworkR1VChatConfig,
    "telechat": Telechat2Config,
    "ultravox": UltravoxConfig,
+    "step3_vl": Step3VLConfig,
+    "step3_text": Step3TextConfig,
    **_CONFIG_REGISTRY_OVERRIDE_HF
 }


--- a/vllm/transformers_utils/configs/__init__.py
+++ b/vllm/transformers_utils/configs/__init__.py
@@ -28,6 +28,9 @@ from vllm.transformers_utils.configs.ovis import OvisConfig
 from vllm.transformers_utils.configs.skyworkr1v import SkyworkR1VChatConfig
 from vllm.transformers_utils.configs.solar import SolarConfig
 from vllm.transformers_utils.configs.telechat2 import Telechat2Config
+from vllm.transformers_utils.configs.step3_vl import (Step3TextConfig,
+                                                      Step3VisionEncoderConfig,
+                                                      Step3VLConfig)
 from vllm.transformers_utils.configs.ultravox import UltravoxConfig

 __all__ = [
@@ -56,4 +59,7 @@ __all__ = [
    "SolarConfig",
    "Telechat2Config",
    "UltravoxConfig",
+    "Step3VLConfig",
+    "Step3VisionEncoderConfig",
+    "Step3TextConfig",
 ]