"vscode:/vscode.git/clone" did not exist on "77b4f66b9e9269930bf3d1ac609fbb8a962d4e95"
Unverified Commit 56503d9b authored by Byron Hsu's avatar Byron Hsu Committed by GitHub
Browse files

[1/N] Remove `CacheConfig` import in all model files (#1658)

parent 02bc9579
...@@ -24,7 +24,6 @@ from typing import Iterable, Optional, Tuple ...@@ -24,7 +24,6 @@ from typing import Iterable, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from transformers import PretrainedConfig from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import ( from vllm.distributed import (
get_tensor_model_parallel_rank, get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size, get_tensor_model_parallel_world_size,
...@@ -330,7 +329,7 @@ class BaiChuanBaseForCausalLM(nn.Module): ...@@ -330,7 +329,7 @@ class BaiChuanBaseForCausalLM(nn.Module):
self, self,
config: PretrainedConfig, config: PretrainedConfig,
position_embedding: str, position_embedding: str,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
): ):
super().__init__() super().__init__()
...@@ -404,7 +403,7 @@ class BaichuanForCausalLM(BaiChuanBaseForCausalLM): ...@@ -404,7 +403,7 @@ class BaichuanForCausalLM(BaiChuanBaseForCausalLM):
def __init__( def __init__(
self, self,
config, config,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
): ):
if config.hidden_size == 4096: # baichuan2 7b if config.hidden_size == 4096: # baichuan2 7b
......
...@@ -22,7 +22,6 @@ from typing import Iterable, Optional, Tuple ...@@ -22,7 +22,6 @@ from typing import Iterable, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from torch.nn import LayerNorm from torch.nn import LayerNorm
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import ( from vllm.model_executor.layers.vocab_parallel_embedding import (
...@@ -52,7 +51,7 @@ class GLMAttention(nn.Module): ...@@ -52,7 +51,7 @@ class GLMAttention(nn.Module):
self, self,
config, config,
layer_id: int = 0, layer_id: int = 0,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
): ):
super().__init__() super().__init__()
...@@ -188,7 +187,7 @@ class GLMBlock(nn.Module): ...@@ -188,7 +187,7 @@ class GLMBlock(nn.Module):
self, self,
config, config,
layer_id: int, layer_id: int,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
): ):
super().__init__() super().__init__()
...@@ -260,7 +259,7 @@ class GLMTransformer(nn.Module): ...@@ -260,7 +259,7 @@ class GLMTransformer(nn.Module):
def __init__( def __init__(
self, self,
config, config,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
): ):
super().__init__() super().__init__()
...@@ -308,7 +307,7 @@ class ChatGLMModel(nn.Module): ...@@ -308,7 +307,7 @@ class ChatGLMModel(nn.Module):
def __init__( def __init__(
self, self,
config, config,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
): ):
super().__init__() super().__init__()
...@@ -359,7 +358,7 @@ class ChatGLMForCausalLM(nn.Module): ...@@ -359,7 +358,7 @@ class ChatGLMForCausalLM(nn.Module):
def __init__( def __init__(
self, self,
config: ChatGLMConfig, config: ChatGLMConfig,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
lora_config: Optional[LoraConfig] = None, lora_config: Optional[LoraConfig] = None,
): ):
......
...@@ -45,7 +45,6 @@ import torch.utils.checkpoint ...@@ -45,7 +45,6 @@ import torch.utils.checkpoint
from torch import nn from torch import nn
from torch.nn.parameter import Parameter from torch.nn.parameter import Parameter
from transformers import PretrainedConfig from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import ( from vllm.distributed import (
get_tensor_model_parallel_rank, get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size, get_tensor_model_parallel_world_size,
...@@ -320,7 +319,7 @@ class CohereForCausalLM(nn.Module): ...@@ -320,7 +319,7 @@ class CohereForCausalLM(nn.Module):
self, self,
config: PretrainedConfig, config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None, cache_config=None,
) -> None: ) -> None:
super().__init__() super().__init__()
self.config = config self.config = config
......
...@@ -20,7 +20,6 @@ from typing import Iterable, Optional, Tuple ...@@ -20,7 +20,6 @@ from typing import Iterable, Optional, Tuple
import torch import torch
import torch.nn as nn import torch.nn as nn
from vllm.config import CacheConfig
from vllm.distributed import ( from vllm.distributed import (
get_tensor_model_parallel_rank, get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size, get_tensor_model_parallel_world_size,
...@@ -368,7 +367,7 @@ class DbrxForCausalLM(nn.Module): ...@@ -368,7 +367,7 @@ class DbrxForCausalLM(nn.Module):
self, self,
config: DbrxConfig, config: DbrxConfig,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None, cache_config=None,
): ):
super().__init__() super().__init__()
self.config = config self.config = config
......
...@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple ...@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from transformers import PretrainedConfig from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import ( from vllm.distributed import (
get_tensor_model_parallel_rank, get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size, get_tensor_model_parallel_world_size,
...@@ -185,7 +184,7 @@ class DeepseekAttention(nn.Module): ...@@ -185,7 +184,7 @@ class DeepseekAttention(nn.Module):
rope_theta: float = 10000, rope_theta: float = 10000,
rope_scaling: Optional[Dict[str, Any]] = None, rope_scaling: Optional[Dict[str, Any]] = None,
max_position_embeddings: int = 8192, max_position_embeddings: int = 8192,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
) -> None: ) -> None:
super().__init__() super().__init__()
...@@ -262,7 +261,7 @@ class DeepseekDecoderLayer(nn.Module): ...@@ -262,7 +261,7 @@ class DeepseekDecoderLayer(nn.Module):
self, self,
config: PretrainedConfig, config: PretrainedConfig,
layer_id: int, layer_id: int,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
) -> None: ) -> None:
super().__init__() super().__init__()
...@@ -331,7 +330,7 @@ class DeepseekModel(nn.Module): ...@@ -331,7 +330,7 @@ class DeepseekModel(nn.Module):
def __init__( def __init__(
self, self,
config: PretrainedConfig, config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
) -> None: ) -> None:
super().__init__() super().__init__()
...@@ -374,7 +373,7 @@ class DeepseekForCausalLM(nn.Module): ...@@ -374,7 +373,7 @@ class DeepseekForCausalLM(nn.Module):
def __init__( def __init__(
self, self,
config: PretrainedConfig, config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
) -> None: ) -> None:
super().__init__() super().__init__()
......
...@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple ...@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from transformers import PretrainedConfig from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import ( from vllm.distributed import (
get_tensor_model_parallel_world_size, get_tensor_model_parallel_world_size,
tensor_model_parallel_all_reduce, tensor_model_parallel_all_reduce,
...@@ -188,7 +187,7 @@ class DeepseekV2Attention(nn.Module): ...@@ -188,7 +187,7 @@ class DeepseekV2Attention(nn.Module):
rope_theta: float = 10000, rope_theta: float = 10000,
rope_scaling: Optional[Dict[str, Any]] = None, rope_scaling: Optional[Dict[str, Any]] = None,
max_position_embeddings: int = 8192, max_position_embeddings: int = 8192,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
layer_id=None, layer_id=None,
) -> None: ) -> None:
...@@ -336,7 +335,7 @@ class DeepseekV2AttentionMLA(nn.Module): ...@@ -336,7 +335,7 @@ class DeepseekV2AttentionMLA(nn.Module):
rope_theta: float = 10000, rope_theta: float = 10000,
rope_scaling: Optional[Dict[str, Any]] = None, rope_scaling: Optional[Dict[str, Any]] = None,
max_position_embeddings: int = 8192, max_position_embeddings: int = 8192,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
layer_id=None, layer_id=None,
) -> None: ) -> None:
...@@ -498,7 +497,7 @@ class DeepseekV2DecoderLayer(nn.Module): ...@@ -498,7 +497,7 @@ class DeepseekV2DecoderLayer(nn.Module):
self, self,
config: PretrainedConfig, config: PretrainedConfig,
layer_id: int, layer_id: int,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
) -> None: ) -> None:
super().__init__() super().__init__()
...@@ -594,7 +593,7 @@ class DeepseekV2Model(nn.Module): ...@@ -594,7 +593,7 @@ class DeepseekV2Model(nn.Module):
def __init__( def __init__(
self, self,
config: PretrainedConfig, config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
) -> None: ) -> None:
super().__init__() super().__init__()
...@@ -640,7 +639,7 @@ class DeepseekV2ForCausalLM(nn.Module): ...@@ -640,7 +639,7 @@ class DeepseekV2ForCausalLM(nn.Module):
def __init__( def __init__(
self, self,
config: PretrainedConfig, config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
) -> None: ) -> None:
super().__init__() super().__init__()
......
...@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple ...@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import ( from vllm.model_executor.layers.vocab_parallel_embedding import (
...@@ -295,7 +294,7 @@ class ExaoneForCausalLM(nn.Module): ...@@ -295,7 +294,7 @@ class ExaoneForCausalLM(nn.Module):
self, self,
config, config,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None, cache_config=None,
) -> None: ) -> None:
super().__init__() super().__init__()
self.config = config self.config = config
......
...@@ -21,7 +21,7 @@ from typing import Iterable, Optional, Tuple ...@@ -21,7 +21,7 @@ from typing import Iterable, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from transformers import PretrainedConfig from transformers import PretrainedConfig
from vllm.config import CacheConfig, LoRAConfig from vllm.config import LoRAConfig
from vllm.distributed import get_tensor_model_parallel_world_size from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
...@@ -279,7 +279,7 @@ class GemmaForCausalLM(nn.Module): ...@@ -279,7 +279,7 @@ class GemmaForCausalLM(nn.Module):
config: PretrainedConfig, config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
lora_config: Optional[LoRAConfig] = None, lora_config: Optional[LoRAConfig] = None,
cache_config: Optional[CacheConfig] = None, cache_config=None,
) -> None: ) -> None:
del lora_config # Unused. del lora_config # Unused.
super().__init__() super().__init__()
......
...@@ -20,7 +20,7 @@ from typing import Iterable, Optional, Set, Tuple, Union ...@@ -20,7 +20,7 @@ from typing import Iterable, Optional, Set, Tuple, Union
import torch import torch
from torch import nn from torch import nn
from transformers import PretrainedConfig from transformers import PretrainedConfig
from vllm.config import CacheConfig, LoRAConfig from vllm.config import LoRAConfig
from vllm.distributed import get_tensor_model_parallel_world_size from vllm.distributed import get_tensor_model_parallel_world_size
# from vllm.model_executor.layers.rotary_embedding import GemmaRotaryEmbedding # from vllm.model_executor.layers.rotary_embedding import GemmaRotaryEmbedding
...@@ -105,7 +105,7 @@ class Gemma2Attention(nn.Module): ...@@ -105,7 +105,7 @@ class Gemma2Attention(nn.Module):
head_dim: int, head_dim: int,
max_position_embeddings: int, max_position_embeddings: int,
rope_theta: float, rope_theta: float,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
) -> None: ) -> None:
super().__init__() super().__init__()
...@@ -190,7 +190,7 @@ class Gemma2DecoderLayer(nn.Module): ...@@ -190,7 +190,7 @@ class Gemma2DecoderLayer(nn.Module):
self, self,
layer_idx: int, layer_idx: int,
config: PretrainedConfig, config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
) -> None: ) -> None:
super().__init__() super().__init__()
...@@ -257,7 +257,7 @@ class Gemma2Model(nn.Module): ...@@ -257,7 +257,7 @@ class Gemma2Model(nn.Module):
def __init__( def __init__(
self, self,
config: PretrainedConfig, config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
) -> None: ) -> None:
super().__init__() super().__init__()
...@@ -336,7 +336,7 @@ class Gemma2ForCausalLM(nn.Module): ...@@ -336,7 +336,7 @@ class Gemma2ForCausalLM(nn.Module):
def __init__( def __init__(
self, self,
config: PretrainedConfig, config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
lora_config: Optional[LoRAConfig] = None, lora_config: Optional[LoRAConfig] = None,
) -> None: ) -> None:
......
...@@ -21,7 +21,7 @@ from typing import Iterable, Optional, Tuple ...@@ -21,7 +21,7 @@ from typing import Iterable, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from transformers import GPTBigCodeConfig from transformers import GPTBigCodeConfig
from vllm.config import CacheConfig, LoRAConfig from vllm.config import LoRAConfig
from vllm.distributed import get_tensor_model_parallel_world_size from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.model_loader.weight_utils import default_weight_loader
...@@ -44,7 +44,7 @@ class GPTBigCodeAttention(nn.Module): ...@@ -44,7 +44,7 @@ class GPTBigCodeAttention(nn.Module):
self, self,
layer_id: int, layer_id: int,
config: GPTBigCodeConfig, config: GPTBigCodeConfig,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
): ):
super().__init__() super().__init__()
...@@ -145,7 +145,7 @@ class GPTBigCodeBlock(nn.Module): ...@@ -145,7 +145,7 @@ class GPTBigCodeBlock(nn.Module):
self, self,
layer_id: int, layer_id: int,
config: GPTBigCodeConfig, config: GPTBigCodeConfig,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
): ):
super().__init__() super().__init__()
...@@ -183,7 +183,7 @@ class GPTBigCodeModel(nn.Module): ...@@ -183,7 +183,7 @@ class GPTBigCodeModel(nn.Module):
def __init__( def __init__(
self, self,
config: GPTBigCodeConfig, config: GPTBigCodeConfig,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
lora_config: Optional[LoRAConfig] = None, lora_config: Optional[LoRAConfig] = None,
): ):
...@@ -243,7 +243,7 @@ class GPTBigCodeForCausalLM(nn.Module): ...@@ -243,7 +243,7 @@ class GPTBigCodeForCausalLM(nn.Module):
def __init__( def __init__(
self, self,
config: GPTBigCodeConfig, config: GPTBigCodeConfig,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
lora_config: Optional[LoRAConfig] = None, lora_config: Optional[LoRAConfig] = None,
): ):
......
...@@ -23,7 +23,6 @@ import torch ...@@ -23,7 +23,6 @@ import torch
import torch.nn.functional as F import torch.nn.functional as F
from torch import nn from torch import nn
from transformers import PretrainedConfig from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import ( from vllm.distributed import (
get_tensor_model_parallel_rank, get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size, get_tensor_model_parallel_world_size,
...@@ -289,7 +288,7 @@ class Grok1ForCausalLM(nn.Module): ...@@ -289,7 +288,7 @@ class Grok1ForCausalLM(nn.Module):
self, self,
config: PretrainedConfig, config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None, cache_config=None,
) -> None: ) -> None:
super().__init__() super().__init__()
self.config = config self.config = config
......
...@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple ...@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from transformers import PretrainedConfig from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import ( from vllm.model_executor.layers.vocab_parallel_embedding import (
...@@ -254,7 +253,7 @@ class InternLM2ForCausalLM(nn.Module): ...@@ -254,7 +253,7 @@ class InternLM2ForCausalLM(nn.Module):
self, self,
config: PretrainedConfig, config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None, cache_config=None,
) -> None: ) -> None:
super().__init__() super().__init__()
self.config = config self.config = config
......
...@@ -22,7 +22,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple ...@@ -22,7 +22,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from transformers import LlamaConfig from transformers import LlamaConfig
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import ( from vllm.model_executor.layers.vocab_parallel_embedding import (
...@@ -295,7 +294,7 @@ class LlamaForCausalLM(nn.Module): ...@@ -295,7 +294,7 @@ class LlamaForCausalLM(nn.Module):
self, self,
config: LlamaConfig, config: LlamaConfig,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None, cache_config=None,
) -> None: ) -> None:
super().__init__() super().__init__()
self.config = config self.config = config
......
...@@ -18,7 +18,6 @@ from typing import Iterable, Optional, Tuple ...@@ -18,7 +18,6 @@ from typing import Iterable, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from transformers import LlamaConfig from transformers import LlamaConfig
from vllm.config import CacheConfig
from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from sglang.srt.layers.logits_processor import LogitsProcessorOutput from sglang.srt.layers.logits_processor import LogitsProcessorOutput
...@@ -32,7 +31,7 @@ class LlamaForClassification(nn.Module): ...@@ -32,7 +31,7 @@ class LlamaForClassification(nn.Module):
self, self,
config: LlamaConfig, config: LlamaConfig,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None, cache_config=None,
) -> None: ) -> None:
super().__init__() super().__init__()
self.config = config self.config = config
......
...@@ -18,7 +18,6 @@ from typing import Iterable, Optional, Tuple ...@@ -18,7 +18,6 @@ from typing import Iterable, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from transformers import LlamaConfig from transformers import LlamaConfig
from vllm.config import CacheConfig
from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from sglang.srt.layers.logits_processor import LogitsProcessorOutput from sglang.srt.layers.logits_processor import LogitsProcessorOutput
...@@ -33,7 +32,7 @@ class LlamaForSequenceClassification(nn.Module): ...@@ -33,7 +32,7 @@ class LlamaForSequenceClassification(nn.Module):
self, self,
config: LlamaConfig, config: LlamaConfig,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None, cache_config=None,
) -> None: ) -> None:
super().__init__() super().__init__()
self.config = config self.config = config
...@@ -92,7 +91,7 @@ class LlamaForSequenceClassificationWithNormal_Weights(LlamaForSequenceClassific ...@@ -92,7 +91,7 @@ class LlamaForSequenceClassificationWithNormal_Weights(LlamaForSequenceClassific
self, self,
config: LlamaConfig, config: LlamaConfig,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None, cache_config=None,
) -> None: ) -> None:
super().__init__(config, quant_config, cache_config) super().__init__(config, quant_config, cache_config)
self.weights = self.Weights(config.hidden_size, self.num_labels) self.weights = self.Weights(config.hidden_size, self.num_labels)
......
...@@ -31,7 +31,6 @@ from transformers import ( ...@@ -31,7 +31,6 @@ from transformers import (
SiglipVisionModel, SiglipVisionModel,
) )
from transformers.models.llava.modeling_llava import LlavaMultiModalProjector from transformers.models.llava.modeling_llava import LlavaMultiModalProjector
from vllm.config import CacheConfig
from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from sglang.srt.layers.quantization.base_config import QuantizationConfig from sglang.srt.layers.quantization.base_config import QuantizationConfig
...@@ -450,7 +449,7 @@ class LlavaLlamaForCausalLM(LlavaBaseForCausalLM): ...@@ -450,7 +449,7 @@ class LlavaLlamaForCausalLM(LlavaBaseForCausalLM):
self, self,
config: LlavaConfig, config: LlavaConfig,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None, cache_config=None,
) -> None: ) -> None:
super().__init__() super().__init__()
...@@ -472,7 +471,7 @@ class LlavaQwenForCausalLM(LlavaBaseForCausalLM): ...@@ -472,7 +471,7 @@ class LlavaQwenForCausalLM(LlavaBaseForCausalLM):
self, self,
config: LlavaConfig, config: LlavaConfig,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None, cache_config=None,
) -> None: ) -> None:
super().__init__() super().__init__()
...@@ -505,7 +504,7 @@ class LlavaMistralForCausalLM(LlavaBaseForCausalLM): ...@@ -505,7 +504,7 @@ class LlavaMistralForCausalLM(LlavaBaseForCausalLM):
self, self,
config: LlavaConfig, config: LlavaConfig,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None, cache_config=None,
) -> None: ) -> None:
super().__init__() super().__init__()
......
...@@ -22,7 +22,6 @@ import torch ...@@ -22,7 +22,6 @@ import torch
from torch import nn from torch import nn
from transformers import CLIPVisionModel, LlavaConfig from transformers import CLIPVisionModel, LlavaConfig
from transformers.models.llava.modeling_llava import LlavaMultiModalProjector from transformers.models.llava.modeling_llava import LlavaMultiModalProjector
from vllm.config import CacheConfig
from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from sglang.srt.layers.quantization.base_config import QuantizationConfig from sglang.srt.layers.quantization.base_config import QuantizationConfig
...@@ -36,7 +35,7 @@ class LlavaVidForCausalLM(nn.Module): ...@@ -36,7 +35,7 @@ class LlavaVidForCausalLM(nn.Module):
self, self,
config: LlavaConfig, config: LlavaConfig,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None, cache_config=None,
) -> None: ) -> None:
super().__init__() super().__init__()
self.config = config self.config = config
......
...@@ -20,7 +20,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple ...@@ -20,7 +20,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import ( from vllm.model_executor.layers.vocab_parallel_embedding import (
...@@ -278,7 +277,7 @@ class MiniCPMForCausalLM(nn.Module): ...@@ -278,7 +277,7 @@ class MiniCPMForCausalLM(nn.Module):
self, self,
config, config,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None, cache_config=None,
) -> None: ) -> None:
super().__init__() super().__init__()
self.config = config self.config = config
......
...@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple ...@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from transformers import PretrainedConfig from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.linear import ( from vllm.model_executor.layers.linear import (
ColumnParallelLinear, ColumnParallelLinear,
...@@ -108,7 +107,7 @@ class MiniCPM3Attention(nn.Module): ...@@ -108,7 +107,7 @@ class MiniCPM3Attention(nn.Module):
rope_theta: float = 10000, rope_theta: float = 10000,
rope_scaling: Optional[Dict[str, Any]] = None, rope_scaling: Optional[Dict[str, Any]] = None,
max_position_embeddings: int = 8192, max_position_embeddings: int = 8192,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
layer_id=None, layer_id=None,
) -> None: ) -> None:
...@@ -252,7 +251,7 @@ class MiniCPM3AttentionMLA(nn.Module): ...@@ -252,7 +251,7 @@ class MiniCPM3AttentionMLA(nn.Module):
rope_theta: float = 10000, rope_theta: float = 10000,
rope_scaling: Optional[Dict[str, Any]] = None, rope_scaling: Optional[Dict[str, Any]] = None,
max_position_embeddings: int = 8192, max_position_embeddings: int = 8192,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
layer_id=None, layer_id=None,
) -> None: ) -> None:
...@@ -409,7 +408,7 @@ class MiniCPM3DecoderLayer(nn.Module): ...@@ -409,7 +408,7 @@ class MiniCPM3DecoderLayer(nn.Module):
self, self,
config: PretrainedConfig, config: PretrainedConfig,
layer_id: int, layer_id: int,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
) -> None: ) -> None:
super().__init__() super().__init__()
...@@ -501,7 +500,7 @@ class MiniCPM3Model(nn.Module): ...@@ -501,7 +500,7 @@ class MiniCPM3Model(nn.Module):
def __init__( def __init__(
self, self,
config: PretrainedConfig, config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
) -> None: ) -> None:
super().__init__() super().__init__()
...@@ -552,7 +551,7 @@ class MiniCPM3ForCausalLM(nn.Module): ...@@ -552,7 +551,7 @@ class MiniCPM3ForCausalLM(nn.Module):
def __init__( def __init__(
self, self,
config: PretrainedConfig, config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None, cache_config=None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
) -> None: ) -> None:
super().__init__() super().__init__()
......
...@@ -21,7 +21,6 @@ from typing import Iterable, Optional, Tuple ...@@ -21,7 +21,6 @@ from typing import Iterable, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from transformers import MixtralConfig from transformers import MixtralConfig
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.fused_moe import FusedMoE from vllm.model_executor.layers.fused_moe import FusedMoE
from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.rotary_embedding import get_rope
...@@ -293,7 +292,7 @@ class MixtralForCausalLM(nn.Module): ...@@ -293,7 +292,7 @@ class MixtralForCausalLM(nn.Module):
self, self,
config: MixtralConfig, config: MixtralConfig,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None, cache_config=None,
) -> None: ) -> None:
super().__init__() super().__init__()
self.config = config self.config = config
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment