"docs/deployment/frameworks/dstack.md" did not exist on "42bb201fd6f79d6ed2e28e0263ffa891cd993c4c"
Commit 53ffe40e authored by zhuwenwen's avatar zhuwenwen
Browse files

[Model] Add step3 vl

parent 2db9a54d
...@@ -598,6 +598,7 @@ Specified using `--task generate`. ...@@ -598,6 +598,7 @@ Specified using `--task generate`.
| `Qwen2_5OmniThinkerForConditionalGeneration` | Qwen2.5-Omni | T + I<sup>E+</sup> + V<sup>E+</sup> + A<sup>+</sup> | `Qwen/Qwen2.5-Omni-7B` | | ✅︎ | ✅︎\* | | `Qwen2_5OmniThinkerForConditionalGeneration` | Qwen2.5-Omni | T + I<sup>E+</sup> + V<sup>E+</sup> + A<sup>+</sup> | `Qwen/Qwen2.5-Omni-7B` | | ✅︎ | ✅︎\* |
| `SkyworkR1VChatModel` | Skywork-R1V-38B | T + I | `Skywork/Skywork-R1V-38B` | | ✅︎ | ✅︎ | | `SkyworkR1VChatModel` | Skywork-R1V-38B | T + I | `Skywork/Skywork-R1V-38B` | | ✅︎ | ✅︎ |
| `SmolVLMForConditionalGeneration` | SmolVLM2 | T + I | `SmolVLM2-2.2B-Instruct` | ✅︎ | | ✅︎ | | `SmolVLMForConditionalGeneration` | SmolVLM2 | T + I | `SmolVLM2-2.2B-Instruct` | ✅︎ | | ✅︎ |
| `Step3VLForConditionalGeneration` | Step3-VL | T + I<sup>+</sup> | `stepfun-ai/step3` | | ✅︎ | ✅︎ |
| `TarsierForConditionalGeneration` | Tarsier | T + I<sup>E+</sup> | `omni-search/Tarsier-7b`,`omni-search/Tarsier-34b` | | ✅︎ | ✅︎ | | `TarsierForConditionalGeneration` | Tarsier | T + I<sup>E+</sup> | `omni-search/Tarsier-7b`,`omni-search/Tarsier-34b` | | ✅︎ | ✅︎ |
| `Tarsier2ForConditionalGeneration`<sup>^</sup> | Tarsier2 | T + I<sup>E+</sup> + V<sup>E+</sup> | `omni-research/Tarsier2-Recap-7b`,`omni-research/Tarsier2-7b-0115` | | ✅︎ | ✅︎ | | `Tarsier2ForConditionalGeneration`<sup>^</sup> | Tarsier2 | T + I<sup>E+</sup> + V<sup>E+</sup> | `omni-research/Tarsier2-Recap-7b`,`omni-research/Tarsier2-7b-0115` | | ✅︎ | ✅︎ |
......
...@@ -266,6 +266,9 @@ _TEXT_GENERATION_EXAMPLE_MODELS = { ...@@ -266,6 +266,9 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"StableLMEpochForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"stabilityai/stablelm-zephyr-3b")), # noqa: E501 "StableLMEpochForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"stabilityai/stablelm-zephyr-3b")), # noqa: E501
"StableLmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"stabilityai/stablelm-3b-4e1t")), "StableLmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"stabilityai/stablelm-3b-4e1t")),
"Starcoder2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"bigcode/starcoder2-3b")), "Starcoder2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"bigcode/starcoder2-3b")),
"Step3TextForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stepfun-ai/step3"),
trust_remote_code=True,
is_available_online=False),
"SolarForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"upstage/solar-pro-preview-instruct")), "SolarForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"upstage/solar-pro-preview-instruct")),
"TeleChat2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"Tele-AI/TeleChat2-3B"), "TeleChat2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"Tele-AI/TeleChat2-3B"),
trust_remote_code=True), trust_remote_code=True),
...@@ -423,6 +426,9 @@ _MULTIMODAL_EXAMPLE_MODELS = { ...@@ -423,6 +426,9 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-7B-AWQ")), # noqa: E501 "Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-7B-AWQ")), # noqa: E501
"SkyworkR1VChatModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Skywork/Skywork-R1V-38B")), "SkyworkR1VChatModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Skywork/Skywork-R1V-38B")),
"SmolVLMForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM2-2.2B-Instruct")), # noqa: E501 "SmolVLMForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM2-2.2B-Instruct")), # noqa: E501
"Step3VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "stepfun-ai/step3"),
trust_remote_code=True,
is_available_online=False),
"UltravoxModel": _HfExamplesInfo(os.path.join(models_path_prefix, "fixie-ai/ultravox-v0_5-llama-3_2-1b"), # noqa: E501 "UltravoxModel": _HfExamplesInfo(os.path.join(models_path_prefix, "fixie-ai/ultravox-v0_5-llama-3_2-1b"), # noqa: E501
trust_remote_code=True), trust_remote_code=True),
"TarsierForConditionalGeneration": _HfExamplesInfo("omni-research/Tarsier-7b", # noqa: E501 "TarsierForConditionalGeneration": _HfExamplesInfo("omni-research/Tarsier-7b", # noqa: E501
......
...@@ -15,6 +15,7 @@ from .minimax_tool_parser import MinimaxToolParser ...@@ -15,6 +15,7 @@ from .minimax_tool_parser import MinimaxToolParser
from .mistral_tool_parser import MistralToolParser from .mistral_tool_parser import MistralToolParser
from .phi4mini_tool_parser import Phi4MiniJsonToolParser from .phi4mini_tool_parser import Phi4MiniJsonToolParser
from .pythonic_tool_parser import PythonicToolParser from .pythonic_tool_parser import PythonicToolParser
from .step3_tool_parser import Step3ToolParser
from .xlam_tool_parser import xLAMToolParser from .xlam_tool_parser import xLAMToolParser
__all__ = [ __all__ = [
...@@ -31,6 +32,7 @@ __all__ = [ ...@@ -31,6 +32,7 @@ __all__ = [
"PythonicToolParser", "PythonicToolParser",
"Phi4MiniJsonToolParser", "Phi4MiniJsonToolParser",
"DeepSeekV3ToolParser", "DeepSeekV3ToolParser",
"Step3ToolParser",
"xLAMToolParser", "xLAMToolParser",
"MinimaxToolParser", "MinimaxToolParser",
"Glm4MoeModelToolParser", "Glm4MoeModelToolParser",
......
...@@ -120,6 +120,7 @@ _TEXT_GENERATION_MODELS = { ...@@ -120,6 +120,7 @@ _TEXT_GENERATION_MODELS = {
"Qwen3ForCausalLM": ("qwen3", "Qwen3ForCausalLM"), "Qwen3ForCausalLM": ("qwen3", "Qwen3ForCausalLM"),
"Qwen3MoeForCausalLM": ("qwen3_moe", "Qwen3MoeForCausalLM"), "Qwen3MoeForCausalLM": ("qwen3_moe", "Qwen3MoeForCausalLM"),
"RWForCausalLM": ("falcon", "FalconForCausalLM"), "RWForCausalLM": ("falcon", "FalconForCausalLM"),
"Step3TextForCausalLM": ("step3_text", "Step3TextForCausalLM"),
"StableLMEpochForCausalLM": ("stablelm", "StablelmForCausalLM"), "StableLMEpochForCausalLM": ("stablelm", "StablelmForCausalLM"),
"StableLmForCausalLM": ("stablelm", "StablelmForCausalLM"), "StableLmForCausalLM": ("stablelm", "StablelmForCausalLM"),
"Starcoder2ForCausalLM": ("starcoder2", "Starcoder2ForCausalLM"), "Starcoder2ForCausalLM": ("starcoder2", "Starcoder2ForCausalLM"),
...@@ -228,6 +229,7 @@ _MULTIMODAL_MODELS = { ...@@ -228,6 +229,7 @@ _MULTIMODAL_MODELS = {
"Qwen2_5OmniModel": ("qwen2_5_omni_thinker", "Qwen2_5OmniThinkerForConditionalGeneration"), # noqa: E501 "Qwen2_5OmniModel": ("qwen2_5_omni_thinker", "Qwen2_5OmniThinkerForConditionalGeneration"), # noqa: E501
"Qwen2_5OmniForConditionalGeneration": ("qwen2_5_omni_thinker", "Qwen2_5OmniThinkerForConditionalGeneration"), # noqa: E501 "Qwen2_5OmniForConditionalGeneration": ("qwen2_5_omni_thinker", "Qwen2_5OmniThinkerForConditionalGeneration"), # noqa: E501
"UltravoxModel": ("ultravox", "UltravoxModel"), "UltravoxModel": ("ultravox", "UltravoxModel"),
"Step3VLForConditionalGeneration": ("step3_vl", "Step3VLForConditionalGeneration"), # noqa: E501
"Phi4MMForCausalLM": ("phi4mm", "Phi4MMForCausalLM"), "Phi4MMForCausalLM": ("phi4mm", "Phi4MMForCausalLM"),
"TarsierForConditionalGeneration": ("tarsier", "TarsierForConditionalGeneration"), # noqa: E501 "TarsierForConditionalGeneration": ("tarsier", "TarsierForConditionalGeneration"), # noqa: E501
"Tarsier2ForConditionalGeneration": ("qwen2_vl", "Tarsier2ForConditionalGeneration"), # noqa: E501 "Tarsier2ForConditionalGeneration": ("qwen2_vl", "Tarsier2ForConditionalGeneration"), # noqa: E501
......
...@@ -6,6 +6,7 @@ from .deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser ...@@ -6,6 +6,7 @@ from .deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
from .glm4_moe_reasoning_parser import Glm4MoeModelReasoningParser from .glm4_moe_reasoning_parser import Glm4MoeModelReasoningParser
from .granite_reasoning_parser import GraniteReasoningParser from .granite_reasoning_parser import GraniteReasoningParser
from .qwen3_reasoning_parser import Qwen3ReasoningParser from .qwen3_reasoning_parser import Qwen3ReasoningParser
from .step3_reasoning_parser import Step3ReasoningParser
__all__ = [ __all__ = [
"ReasoningParser", "ReasoningParser",
...@@ -14,4 +15,5 @@ __all__ = [ ...@@ -14,4 +15,5 @@ __all__ = [
"GraniteReasoningParser", "GraniteReasoningParser",
"Qwen3ReasoningParser", "Qwen3ReasoningParser",
"Glm4MoeModelReasoningParser", "Glm4MoeModelReasoningParser",
"Step3ReasoningParser",
] ]
...@@ -39,6 +39,7 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, Cohere2Config, ...@@ -39,6 +39,7 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, Cohere2Config,
MLPSpeculatorConfig, MPTConfig, MLPSpeculatorConfig, MPTConfig,
NemotronConfig, NVLM_D_Config, NemotronConfig, NVLM_D_Config,
OvisConfig, RWConfig, OvisConfig, RWConfig,
Step3TextConfig, Step3VLConfig,
SkyworkR1VChatConfig, SolarConfig, SkyworkR1VChatConfig, SolarConfig,
Telechat2Config, UltravoxConfig) Telechat2Config, UltravoxConfig)
# yapf: enable # yapf: enable
...@@ -97,6 +98,8 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = { ...@@ -97,6 +98,8 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = {
"skywork_chat": SkyworkR1VChatConfig, "skywork_chat": SkyworkR1VChatConfig,
"telechat": Telechat2Config, "telechat": Telechat2Config,
"ultravox": UltravoxConfig, "ultravox": UltravoxConfig,
"step3_vl": Step3VLConfig,
"step3_text": Step3TextConfig,
**_CONFIG_REGISTRY_OVERRIDE_HF **_CONFIG_REGISTRY_OVERRIDE_HF
} }
......
...@@ -28,6 +28,9 @@ from vllm.transformers_utils.configs.ovis import OvisConfig ...@@ -28,6 +28,9 @@ from vllm.transformers_utils.configs.ovis import OvisConfig
from vllm.transformers_utils.configs.skyworkr1v import SkyworkR1VChatConfig from vllm.transformers_utils.configs.skyworkr1v import SkyworkR1VChatConfig
from vllm.transformers_utils.configs.solar import SolarConfig from vllm.transformers_utils.configs.solar import SolarConfig
from vllm.transformers_utils.configs.telechat2 import Telechat2Config from vllm.transformers_utils.configs.telechat2 import Telechat2Config
from vllm.transformers_utils.configs.step3_vl import (Step3TextConfig,
Step3VisionEncoderConfig,
Step3VLConfig)
from vllm.transformers_utils.configs.ultravox import UltravoxConfig from vllm.transformers_utils.configs.ultravox import UltravoxConfig
__all__ = [ __all__ = [
...@@ -56,4 +59,7 @@ __all__ = [ ...@@ -56,4 +59,7 @@ __all__ = [
"SolarConfig", "SolarConfig",
"Telechat2Config", "Telechat2Config",
"UltravoxConfig", "UltravoxConfig",
"Step3VLConfig",
"Step3VisionEncoderConfig",
"Step3TextConfig",
] ]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment