LLaVa: add cache class attribute (#32278)

cache class flag

LLaVa: add cache class attribute (#32278)
cache class flag
453e7488 · Raushan Turganbay · GitHub · 14ee2326 · 453e7488 · 453e7488
Unverified Commit 453e7488 authored Aug 01, 2024 by Raushan Turganbay Committed by GitHub Aug 01, 2024
6 changed files
--- a/src/transformers/models/llava/modeling_llava.py
+++ b/src/transformers/models/llava/modeling_llava.py
@@ -126,6 +126,7 @@ class LlavaPreTrainedModel(PreTrainedModel):
    _no_split_modules = ["LlavaVisionAttention"]
    _skip_keys_device_placement = "past_key_values"
    _supports_flash_attn_2 = True
+    _supports_cache_class = True

    def _init_weights(self, module):
        # important: this ported version of Llava isn't meant for training from scratch - only

--- a/src/transformers/models/llava_next/modeling_llava_next.py
+++ b/src/transformers/models/llava_next/modeling_llava_next.py
@@ -232,6 +232,7 @@ class LlavaNextPreTrainedModel(PreTrainedModel):
    _no_split_modules = ["LlavaNextVisionAttention"]
    _skip_keys_device_placement = "past_key_values"
    _supports_flash_attn_2 = True
+    _supports_cache_class = True

    def _init_weights(self, module):
        # important: this ported version of LlavaNext isn't meant for training from scratch - only

--- a/src/transformers/models/llava_next_video/modeling_llava_next_video.py
+++ b/src/transformers/models/llava_next_video/modeling_llava_next_video.py
@@ -272,6 +272,7 @@ class LlavaNextVideoPreTrainedModel(PreTrainedModel):
    _no_split_modules = ["LlavaNextVideoVisionAttention"]
    _skip_keys_device_placement = "past_key_values"
    _supports_flash_attn_2 = True
+    _supports_cache_class = True

    def _init_weights(self, module):
        # important: this ported version of LlavaNextVideo isn't meant for training from scratch - only

--- a/src/transformers/models/paligemma/modeling_paligemma.py
+++ b/src/transformers/models/paligemma/modeling_paligemma.py
@@ -127,6 +127,7 @@ class PaliGemmaPreTrainedModel(PreTrainedModel):
    _skip_keys_device_placement = "past_key_values"
    _supports_flash_attn_2 = False
    _supports_sdpa = True
+    _supports_cache_class = True

    def _init_weights(self, module):
        # important: this ported version of PaliGemmaisn't meant for training from scratch - only

--- a/src/transformers/models/video_llava/modeling_video_llava.py
+++ b/src/transformers/models/video_llava/modeling_video_llava.py
@@ -126,6 +126,7 @@ class VideoLlavaPreTrainedModel(PreTrainedModel):
    _no_split_modules = ["VideoLlavaVisionAttention"]
    _skip_keys_device_placement = "past_key_values"
    _supports_flash_attn_2 = True
+    _supports_cache_class = True

    def _init_weights(self, module):
        std = (

--- a/src/transformers/models/vipllava/modeling_vipllava.py
+++ b/src/transformers/models/vipllava/modeling_vipllava.py
@@ -135,6 +135,7 @@ class VipLlavaPreTrainedModel(PreTrainedModel):
    _no_split_modules = ["VipLlavaVisionAttention"]
    _skip_keys_device_placement = "past_key_values"
    _supports_flash_attn_2 = True
+    _supports_cache_class = True

    def _init_weights(self, module):
        # important: this ported version of VipLlava isn't meant for training from scratch - only