[HybridCache] Fix `get_seq_length` method (#31661)

* fix gemma2 * handle in generate

[HybridCache] Fix `get_seq_length` method (#31661)
* fix gemma2 * handle in generate
1c68f2ca · Sanchit Gandhi · GitHub · 464aa746 · 1c68f2ca · 1c68f2ca
Unverified Commit 1c68f2ca authored Jun 27, 2024 by Sanchit Gandhi Committed by GitHub Jun 27, 2024
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

src/transformers/cache_utils.py src/transformers/cache_utils.py +1 -1

src/transformers/generation/utils.py src/transformers/generation/utils.py +1 -1

No files found.
--- a/src/transformers/cache_utils.py
+++ b/src/transformers/cache_utils.py
@@ -1083,7 +1083,7 @@ class HybridCache(Cache):
        # no matter how long the sentence is
        return self.max_cache_len
-    def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
+    def get_seq_length(self, layer_idx: Optional[int] = 0):
        return None
    def reset(self):

--- a/src/transformers/generation/utils.py
+++ b/src/transformers/generation/utils.py
@@ -1399,7 +1399,7 @@ class GenerationMixin:
            cache = model_kwargs["past_key_values"]
            if not isinstance(cache, Cache):
                past_length = cache[0][0].shape[2]
-            elif hasattr(cache, "get_seq_length"):
+            elif hasattr(cache, "get_seq_length") and cache.get_seq_length() is not None:
                past_length = cache.get_seq_length()
        if "inputs_embeds" in model_kwargs: