Unverified Commit 652c24a6 authored by Xinyuan Tong's avatar Xinyuan Tong Committed by GitHub
Browse files

Update transformers package version to 4.57.0 (#11222)


Co-authored-by: default avataryhyang201 <yhyang201@gmail.com>
parent 5e142484
...@@ -63,7 +63,7 @@ dependencies = [ ...@@ -63,7 +63,7 @@ dependencies = [
"torchaudio==2.8.0", "torchaudio==2.8.0",
"torchvision", "torchvision",
"tqdm", "tqdm",
"transformers==4.56.1", "transformers==4.57.0",
"uvicorn", "uvicorn",
"uvloop", "uvloop",
"xgrammar==0.1.24", "xgrammar==0.1.24",
......
...@@ -49,7 +49,7 @@ from typing import List, Optional, Sequence, Tuple, Union ...@@ -49,7 +49,7 @@ from typing import List, Optional, Sequence, Tuple, Union
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from transformers.activations import ACT2FN, PytorchGELUTanh from transformers.activations import ACT2FN, GELUTanh
from transformers.modeling_utils import PreTrainedModel from transformers.modeling_utils import PreTrainedModel
try: try:
...@@ -614,7 +614,7 @@ class MoonVitPretrainedModel(PreTrainedModel): ...@@ -614,7 +614,7 @@ class MoonVitPretrainedModel(PreTrainedModel):
"num_heads": config.num_attention_heads, "num_heads": config.num_attention_heads,
"hidden_dim": config.hidden_size, "hidden_dim": config.hidden_size,
"mlp_dim": config.intermediate_size, "mlp_dim": config.intermediate_size,
"activation": PytorchGELUTanh(), "activation": GELUTanh(),
"attn_bias": True, "attn_bias": True,
"attn_implementation": config._attn_implementation, "attn_implementation": config._attn_implementation,
}, },
......
...@@ -191,30 +191,31 @@ class TestQwen2AudioServer(AudioOpenAITestMixin): ...@@ -191,30 +191,31 @@ class TestQwen2AudioServer(AudioOpenAITestMixin):
cls.base_url += "/v1" cls.base_url += "/v1"
class TestKimiVLServer(ImageOpenAITestMixin): # Temporarily skip Kimi-VL for CI test due to issue in transformers=4.57.0
@classmethod # class TestKimiVLServer(ImageOpenAITestMixin):
def setUpClass(cls): # @classmethod
cls.model = "moonshotai/Kimi-VL-A3B-Instruct" # def setUpClass(cls):
cls.base_url = DEFAULT_URL_FOR_TEST # cls.model = "moonshotai/Kimi-VL-A3B-Instruct"
cls.api_key = "sk-123456" # cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server( # cls.api_key = "sk-123456"
cls.model, # cls.process = popen_launch_server(
cls.base_url, # cls.model,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, # cls.base_url,
other_args=[ # timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
"--trust-remote-code", # other_args=[
"--context-length", # "--trust-remote-code",
"4096", # "--context-length",
"--dtype", # "4096",
"bfloat16", # "--dtype",
"--cuda-graph-max-bs", # "bfloat16",
"4", # "--cuda-graph-max-bs",
], # "4",
) # ],
cls.base_url += "/v1" # )
# cls.base_url += "/v1"
def test_video_images_chat_completion(self): # def test_video_images_chat_completion(self):
pass # pass
class TestGLM41VServer(ImageOpenAITestMixin, VideoOpenAITestMixin): class TestGLM41VServer(ImageOpenAITestMixin, VideoOpenAITestMixin):
......
...@@ -189,31 +189,32 @@ class TestGemmaUnderstandsImage(VLMInputTestBase, unittest.IsolatedAsyncioTestCa ...@@ -189,31 +189,32 @@ class TestGemmaUnderstandsImage(VLMInputTestBase, unittest.IsolatedAsyncioTestCa
) )
class TestKimiVLImageUnderstandsImage( # Temporarily skip Kimi-VL for CI test due to issue in transformers=4.57.0
VLMInputTestBase, unittest.IsolatedAsyncioTestCase # class TestKimiVLImageUnderstandsImage(
): # VLMInputTestBase, unittest.IsolatedAsyncioTestCase
model_path = "moonshotai/Kimi-VL-A3B-Instruct" # ):
chat_template = "kimi-vl" # model_path = "moonshotai/Kimi-VL-A3B-Instruct"
# chat_template = "kimi-vl"
@classmethod # @classmethod
def _init_visual(cls): # def _init_visual(cls):
model = AutoModel.from_pretrained(cls.model_path, trust_remote_code=True) # model = AutoModel.from_pretrained(cls.model_path, trust_remote_code=True)
cls.vision_tower = model.vision_tower.eval().to(cls.device) # cls.vision_tower = model.vision_tower.eval().to(cls.device)
cls.mm_projector = model.multi_modal_projector.eval().to(cls.device) # cls.mm_projector = model.multi_modal_projector.eval().to(cls.device)
cls.visual = lambda tokenizer_output: cls.mm_projector( # cls.visual = lambda tokenizer_output: cls.mm_projector(
cls.vision_tower( # cls.vision_tower(
pixel_values=tokenizer_output["pixel_values"], # pixel_values=tokenizer_output["pixel_values"],
grid_hws=tokenizer_output["image_grid_hws"], # grid_hws=tokenizer_output["image_grid_hws"],
) # )
) # )
def _pixel_values_image_data(self, processor_output): # def _pixel_values_image_data(self, processor_output):
return dict( # return dict(
modality="IMAGE", # modality="IMAGE",
pixel_values=processor_output["pixel_values"], # pixel_values=processor_output["pixel_values"],
image_grid_hws=processor_output["image_grid_hws"], # image_grid_hws=processor_output["image_grid_hws"],
) # )
# not for CI: too large # not for CI: too large
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment