Make InternLM follow `rope_scaling` in `config.json` (#1956)

Co-authored-by: lijie8 <lijie8@sensetime.com>

Make InternLM follow `rope_scaling` in `config.json` (#1956)
Co-authored-by: lijie8 <lijie8@sensetime.com>
ebede26e · Jie Li · GitHub · d940ce49 · ebede26e
Unverified Commit ebede26e authored Dec 08, 2023 by Jie Li Committed by GitHub Dec 07, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 1 deletion

vllm/model_executor/models/internlm.py vllm/model_executor/models/internlm.py +4 -1

No files found.
--- a/vllm/model_executor/models/internlm.py
+++ b/vllm/model_executor/models/internlm.py
 # -*- coding: utf-8 -*-
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple

 import torch
 from torch import nn
@@ -67,6 +67,7 @@ class InternLMAttention(nn.Module):
        rope_theta: float = 10000,
        max_position_embeddings: int = 8192,
        linear_method: Optional[LinearMethodBase] = None,
+        rope_scaling: Optional[Dict[str, Any]] = None,
    ):
        super().__init__()
        self.hidden_size = hidden_size
@@ -99,6 +100,7 @@ class InternLMAttention(nn.Module):
            rotary_dim=self.head_dim,
            max_position=self.max_position_embeddings,
            base=self.rope_theta,
+            rope_scaling=rope_scaling,
        )
        self.attn = PagedAttention(self.num_heads, self.head_dim, self.scaling)

@@ -139,6 +141,7 @@ class InternLMDecoderLayer(nn.Module):
            rope_theta=rope_theta,
            max_position_embeddings=max_position_embeddings,
            linear_method=linear_method,
+            rope_scaling=getattr(config, "rope_scaling", None),
        )
        self.mlp = InternLMMLP(
            hidden_size=self.hidden_size,