Unverified Commit 4ad97370 authored by Stefan He's avatar Stefan He Committed by GitHub
Browse files

chore: bump transformer to 4.54.0 (#8416)


Co-authored-by: default avatarBinyao Jiang <byjiang1996@gmail.com>
Co-authored-by: default avatarLifu Huang <lifu.hlf@gmail.com>
parent 28103384
......@@ -30,7 +30,7 @@ jobs:
- name: Install dependencies
run: |
bash scripts/ci_install_dependency.sh
pip install "vllm==0.9.0.1"
pip install "vllm==0.10.0"
pip install "bitsandbytes>=0.44.0"
- name: Run VLLM dependency tests
......
......@@ -45,7 +45,7 @@ runtime_common = [
"soundfile==0.13.1",
"scipy",
"torchao==0.9.0",
"transformers==4.53.2",
"transformers==4.54.0",
"timm==1.0.16",
"uvicorn",
"uvloop",
......
......@@ -656,11 +656,15 @@ class LlavaForConditionalGeneration(LlavaBaseForCausalLM):
self, auto_model_type: Type[AutoModel]
) -> Dict[str, str]:
mapping = {}
for config_cls, archs in auto_model_type._model_mapping.items():
if isinstance(archs, tuple):
mapping[config_cls.__name__] = tuple(arch.__name__ for arch in archs)
else:
mapping[config_cls.__name__] = archs.__name__
for config_cls in auto_model_type._model_mapping.keys():
archs = auto_model_type._model_mapping.get(config_cls, None)
if archs is not None:
if isinstance(archs, tuple):
mapping[config_cls.__name__] = tuple(
arch.__name__ for arch in archs
)
else:
mapping[config_cls.__name__] = archs.__name__
return mapping
def __init__(
......
......@@ -1134,7 +1134,10 @@ class MiniCPMWhisperEncoderLayer(nn.Module):
"""
residual = hidden_states
hidden_states = self.self_attn_layer_norm(hidden_states)
hidden_states, attn_weights, past_key_values = self.self_attn(
# TODO (lifuhuang): confirmed with Mick that the logic for past_key_values is copied from minicpmo official code,
# currently we are not using past_key_values at all. We need to redesign the caching logic when we support streaming
# in the future.
hidden_states, attn_weights = self.self_attn(
hidden_states=hidden_states,
attention_mask=attention_mask,
layer_head_mask=layer_head_mask,
......
......@@ -51,7 +51,8 @@ class ModelCase:
# Popular models that run on the CI
CI_MODELS = [
ModelCase("meta-llama/Llama-3.1-8B-Instruct"),
ModelCase("google/gemma-2-2b"),
# TODO: Gemma is broken by the bug introduced in the latest transformers version, we should restore once its fixed: https://github.com/huggingface/transformers/issues/39711
# ModelCase("google/gemma-2-2b"),
]
# the complete set of models to test sglang's generation model
......
......@@ -172,28 +172,29 @@ class TestGemma3nServer(TestOpenAIVisionServer):
cls.base_url += "/v1"
class TestKimiVLServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "moonshotai/Kimi-VL-A3B-Instruct"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--context-length",
"4096",
"--dtype",
"bfloat16",
],
)
cls.base_url += "/v1"
# commented out before https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct/discussions/27 get fixed
# class TestKimiVLServer(TestOpenAIVisionServer):
# @classmethod
# def setUpClass(cls):
# cls.model = "moonshotai/Kimi-VL-A3B-Instruct"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-123456"
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# other_args=[
# "--trust-remote-code",
# "--context-length",
# "4096",
# "--dtype",
# "bfloat16",
# ],
# )
# cls.base_url += "/v1"
def test_video_images_chat_completion(self):
pass
# def test_video_images_chat_completion(self):
# pass
class TestPhi4MMServer(TestOpenAIVisionServer):
......
......@@ -189,31 +189,32 @@ class TestGemmaUnderstandsImage(VLMInputTestBase, unittest.IsolatedAsyncioTestCa
)
class TestKimiVLImageUnderstandsImage(
VLMInputTestBase, unittest.IsolatedAsyncioTestCase
):
model_path = "moonshotai/Kimi-VL-A3B-Instruct"
chat_template = "kimi-vl"
# commented out before https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct/discussions/27 get fixed
# class TestKimiVLImageUnderstandsImage(
# VLMInputTestBase, unittest.IsolatedAsyncioTestCase
# ):
# model_path = "moonshotai/Kimi-VL-A3B-Instruct"
# chat_template = "kimi-vl"
@classmethod
def _init_visual(cls):
model = AutoModel.from_pretrained(cls.model_path, trust_remote_code=True)
cls.vision_tower = model.vision_tower.eval().to(cls.device)
cls.mm_projector = model.multi_modal_projector.eval().to(cls.device)
# @classmethod
# def _init_visual(cls):
# model = AutoModel.from_pretrained(cls.model_path, trust_remote_code=True)
# cls.vision_tower = model.vision_tower.eval().to(cls.device)
# cls.mm_projector = model.multi_modal_projector.eval().to(cls.device)
cls.visual = lambda tokenizer_output: cls.mm_projector(
cls.vision_tower(
pixel_values=tokenizer_output["pixel_values"],
grid_hws=tokenizer_output["image_grid_hws"],
)
)
# cls.visual = lambda tokenizer_output: cls.mm_projector(
# cls.vision_tower(
# pixel_values=tokenizer_output["pixel_values"],
# grid_hws=tokenizer_output["image_grid_hws"],
# )
# )
def _pixel_values_image_data(self, processor_output):
return dict(
modality="IMAGE",
pixel_values=processor_output["pixel_values"],
image_grid_hws=processor_output["image_grid_hws"],
)
# def _pixel_values_image_data(self, processor_output):
# return dict(
# modality="IMAGE",
# pixel_values=processor_output["pixel_values"],
# image_grid_hws=processor_output["image_grid_hws"],
# )
# not for CI: too large
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment