Unverified Commit b40db4df authored by ltd0924's avatar ltd0924 Committed by GitHub
Browse files

[StepVL] add step vl offline example (#33054)


Signed-off-by: default avatarluotingdan <luotingdan@stepfun.com>
Co-authored-by: default avatarluotingdan <luotingdan@stepfun.com>
parent 11b55687
...@@ -1889,6 +1889,32 @@ def run_step3(questions: list[str], modality: str) -> ModelRequestData: ...@@ -1889,6 +1889,32 @@ def run_step3(questions: list[str], modality: str) -> ModelRequestData:
) )
# StepVL10B
def run_step_vl(questions: list[str], modality: str) -> ModelRequestData:
assert modality == "image"
model_name = "stepfun-ai/Step3-VL-10B"
engine_args = EngineArgs(
model=model_name,
max_num_batched_tokens=4096,
tensor_parallel_size=1,
trust_remote_code=True,
limit_mm_per_prompt={modality: 1},
reasoning_parser="deepseek_r1",
)
prompts = [
"<|begin▁of▁sentence|> You are a helpful assistant.<|BOT|>user\n "
f"<im_patch>{question} <|EOT|><|BOT|>assistant\n<think>\n"
for question in questions
]
return ModelRequestData(
engine_args=engine_args,
prompts=prompts,
)
# omni-research/Tarsier-7b # omni-research/Tarsier-7b
def run_tarsier(questions: list[str], modality: str) -> ModelRequestData: def run_tarsier(questions: list[str], modality: str) -> ModelRequestData:
assert modality == "image" assert modality == "image"
...@@ -2006,6 +2032,7 @@ model_example_map = { ...@@ -2006,6 +2032,7 @@ model_example_map = {
"skywork_chat": run_skyworkr1v, "skywork_chat": run_skyworkr1v,
"smolvlm": run_smolvlm, "smolvlm": run_smolvlm,
"step3": run_step3, "step3": run_step3,
"stepvl": run_step_vl,
"tarsier": run_tarsier, "tarsier": run_tarsier,
"tarsier2": run_tarsier2, "tarsier2": run_tarsier2,
} }
......
...@@ -1182,6 +1182,32 @@ def load_step3(question: str, image_urls: list[str]) -> ModelRequestData: ...@@ -1182,6 +1182,32 @@ def load_step3(question: str, image_urls: list[str]) -> ModelRequestData:
) )
def load_step_vl(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "stepfun-ai/Step3-VL-10B"
engine_args = EngineArgs(
model=model_name,
max_num_batched_tokens=4096,
limit_mm_per_prompt={"image": len(image_urls)},
hf_overrides={"vision_config": {"enable_patch": False}},
trust_remote_code=True,
reasoning_parser="deepseek_r1",
)
prompt = (
"<|begin▁of▁sentence|> You are a helpful assistant.<|BOT|>user\n "
f"{'<im_patch>' * len(image_urls)}{question}<|EOT|><|BOT|>"
"assistant\n<think>\n"
)
image_data = [fetch_image(url) for url in image_urls]
return ModelRequestData(
engine_args=engine_args,
prompt=prompt,
image_data=image_data,
)
def load_tarsier(question: str, image_urls: list[str]) -> ModelRequestData: def load_tarsier(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "omni-research/Tarsier-7b" model_name = "omni-research/Tarsier-7b"
...@@ -1374,6 +1400,7 @@ model_example_map = { ...@@ -1374,6 +1400,7 @@ model_example_map = {
"rvl": load_r_vl, "rvl": load_r_vl,
"smolvlm": load_smolvlm, "smolvlm": load_smolvlm,
"step3": load_step3, "step3": load_step3,
"stepvl": load_step_vl,
"tarsier": load_tarsier, "tarsier": load_tarsier,
"tarsier2": load_tarsier2, "tarsier2": load_tarsier2,
"glm4_5v": load_glm4_5v, "glm4_5v": load_glm4_5v,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment