"vscode:/vscode.git/clone" did not exist on "f2fcce5802458c091de1d2c63d9c8f77fae43c8c"
Unverified Commit 56503d9b authored by Byron Hsu's avatar Byron Hsu Committed by GitHub
Browse files

[1/N] Remove `CacheConfig` import in all model files (#1658)

parent 02bc9579
......@@ -24,7 +24,6 @@ from typing import Iterable, Optional, Tuple
import torch
from torch import nn
from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
......@@ -330,7 +329,7 @@ class BaiChuanBaseForCausalLM(nn.Module):
self,
config: PretrainedConfig,
position_embedding: str,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
......@@ -404,7 +403,7 @@ class BaichuanForCausalLM(BaiChuanBaseForCausalLM):
def __init__(
self,
config,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
if config.hidden_size == 4096: # baichuan2 7b
......
......@@ -22,7 +22,6 @@ from typing import Iterable, Optional, Tuple
import torch
from torch import nn
from torch.nn import LayerNorm
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import (
......@@ -52,7 +51,7 @@ class GLMAttention(nn.Module):
self,
config,
layer_id: int = 0,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
......@@ -188,7 +187,7 @@ class GLMBlock(nn.Module):
self,
config,
layer_id: int,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
......@@ -260,7 +259,7 @@ class GLMTransformer(nn.Module):
def __init__(
self,
config,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
......@@ -308,7 +307,7 @@ class ChatGLMModel(nn.Module):
def __init__(
self,
config,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
......@@ -359,7 +358,7 @@ class ChatGLMForCausalLM(nn.Module):
def __init__(
self,
config: ChatGLMConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
lora_config: Optional[LoraConfig] = None,
):
......
......@@ -45,7 +45,6 @@ import torch.utils.checkpoint
from torch import nn
from torch.nn.parameter import Parameter
from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
......@@ -320,7 +319,7 @@ class CohereForCausalLM(nn.Module):
self,
config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
self.config = config
......
......@@ -20,7 +20,6 @@ from typing import Iterable, Optional, Tuple
import torch
import torch.nn as nn
from vllm.config import CacheConfig
from vllm.distributed import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
......@@ -368,7 +367,7 @@ class DbrxForCausalLM(nn.Module):
self,
config: DbrxConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
):
super().__init__()
self.config = config
......
......@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import torch
from torch import nn
from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
......@@ -185,7 +184,7 @@ class DeepseekAttention(nn.Module):
rope_theta: float = 10000,
rope_scaling: Optional[Dict[str, Any]] = None,
max_position_embeddings: int = 8192,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
......@@ -262,7 +261,7 @@ class DeepseekDecoderLayer(nn.Module):
self,
config: PretrainedConfig,
layer_id: int,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
......@@ -331,7 +330,7 @@ class DeepseekModel(nn.Module):
def __init__(
self,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
......@@ -374,7 +373,7 @@ class DeepseekForCausalLM(nn.Module):
def __init__(
self,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
......
......@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import torch
from torch import nn
from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import (
get_tensor_model_parallel_world_size,
tensor_model_parallel_all_reduce,
......@@ -188,7 +187,7 @@ class DeepseekV2Attention(nn.Module):
rope_theta: float = 10000,
rope_scaling: Optional[Dict[str, Any]] = None,
max_position_embeddings: int = 8192,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
layer_id=None,
) -> None:
......@@ -336,7 +335,7 @@ class DeepseekV2AttentionMLA(nn.Module):
rope_theta: float = 10000,
rope_scaling: Optional[Dict[str, Any]] = None,
max_position_embeddings: int = 8192,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
layer_id=None,
) -> None:
......@@ -498,7 +497,7 @@ class DeepseekV2DecoderLayer(nn.Module):
self,
config: PretrainedConfig,
layer_id: int,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
......@@ -594,7 +593,7 @@ class DeepseekV2Model(nn.Module):
def __init__(
self,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
......@@ -640,7 +639,7 @@ class DeepseekV2ForCausalLM(nn.Module):
def __init__(
self,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
......
......@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import torch
from torch import nn
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import (
......@@ -295,7 +294,7 @@ class ExaoneForCausalLM(nn.Module):
self,
config,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
self.config = config
......
......@@ -21,7 +21,7 @@ from typing import Iterable, Optional, Tuple
import torch
from torch import nn
from transformers import PretrainedConfig
from vllm.config import CacheConfig, LoRAConfig
from vllm.config import LoRAConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
......@@ -279,7 +279,7 @@ class GemmaForCausalLM(nn.Module):
config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
lora_config: Optional[LoRAConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
del lora_config # Unused.
super().__init__()
......
......@@ -20,7 +20,7 @@ from typing import Iterable, Optional, Set, Tuple, Union
import torch
from torch import nn
from transformers import PretrainedConfig
from vllm.config import CacheConfig, LoRAConfig
from vllm.config import LoRAConfig
from vllm.distributed import get_tensor_model_parallel_world_size
# from vllm.model_executor.layers.rotary_embedding import GemmaRotaryEmbedding
......@@ -105,7 +105,7 @@ class Gemma2Attention(nn.Module):
head_dim: int,
max_position_embeddings: int,
rope_theta: float,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
......@@ -190,7 +190,7 @@ class Gemma2DecoderLayer(nn.Module):
self,
layer_idx: int,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
......@@ -257,7 +257,7 @@ class Gemma2Model(nn.Module):
def __init__(
self,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
......@@ -336,7 +336,7 @@ class Gemma2ForCausalLM(nn.Module):
def __init__(
self,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
lora_config: Optional[LoRAConfig] = None,
) -> None:
......
......@@ -21,7 +21,7 @@ from typing import Iterable, Optional, Tuple
import torch
from torch import nn
from transformers import GPTBigCodeConfig
from vllm.config import CacheConfig, LoRAConfig
from vllm.config import LoRAConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
......@@ -44,7 +44,7 @@ class GPTBigCodeAttention(nn.Module):
self,
layer_id: int,
config: GPTBigCodeConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
......@@ -145,7 +145,7 @@ class GPTBigCodeBlock(nn.Module):
self,
layer_id: int,
config: GPTBigCodeConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
......@@ -183,7 +183,7 @@ class GPTBigCodeModel(nn.Module):
def __init__(
self,
config: GPTBigCodeConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
lora_config: Optional[LoRAConfig] = None,
):
......@@ -243,7 +243,7 @@ class GPTBigCodeForCausalLM(nn.Module):
def __init__(
self,
config: GPTBigCodeConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
lora_config: Optional[LoRAConfig] = None,
):
......
......@@ -23,7 +23,6 @@ import torch
import torch.nn.functional as F
from torch import nn
from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
......@@ -289,7 +288,7 @@ class Grok1ForCausalLM(nn.Module):
self,
config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
self.config = config
......
......@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import torch
from torch import nn
from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import (
......@@ -254,7 +253,7 @@ class InternLM2ForCausalLM(nn.Module):
self,
config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
self.config = config
......
......@@ -22,7 +22,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import torch
from torch import nn
from transformers import LlamaConfig
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import (
......@@ -295,7 +294,7 @@ class LlamaForCausalLM(nn.Module):
self,
config: LlamaConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
self.config = config
......
......@@ -18,7 +18,6 @@ from typing import Iterable, Optional, Tuple
import torch
from torch import nn
from transformers import LlamaConfig
from vllm.config import CacheConfig
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
......@@ -32,7 +31,7 @@ class LlamaForClassification(nn.Module):
self,
config: LlamaConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
self.config = config
......
......@@ -18,7 +18,6 @@ from typing import Iterable, Optional, Tuple
import torch
from torch import nn
from transformers import LlamaConfig
from vllm.config import CacheConfig
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
......@@ -33,7 +32,7 @@ class LlamaForSequenceClassification(nn.Module):
self,
config: LlamaConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
self.config = config
......@@ -92,7 +91,7 @@ class LlamaForSequenceClassificationWithNormal_Weights(LlamaForSequenceClassific
self,
config: LlamaConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__(config, quant_config, cache_config)
self.weights = self.Weights(config.hidden_size, self.num_labels)
......
......@@ -31,7 +31,6 @@ from transformers import (
SiglipVisionModel,
)
from transformers.models.llava.modeling_llava import LlavaMultiModalProjector
from vllm.config import CacheConfig
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from sglang.srt.layers.quantization.base_config import QuantizationConfig
......@@ -450,7 +449,7 @@ class LlavaLlamaForCausalLM(LlavaBaseForCausalLM):
self,
config: LlavaConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
......@@ -472,7 +471,7 @@ class LlavaQwenForCausalLM(LlavaBaseForCausalLM):
self,
config: LlavaConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
......@@ -505,7 +504,7 @@ class LlavaMistralForCausalLM(LlavaBaseForCausalLM):
self,
config: LlavaConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
......
......@@ -22,7 +22,6 @@ import torch
from torch import nn
from transformers import CLIPVisionModel, LlavaConfig
from transformers.models.llava.modeling_llava import LlavaMultiModalProjector
from vllm.config import CacheConfig
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from sglang.srt.layers.quantization.base_config import QuantizationConfig
......@@ -36,7 +35,7 @@ class LlavaVidForCausalLM(nn.Module):
self,
config: LlavaConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
self.config = config
......
......@@ -20,7 +20,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import torch
from torch import nn
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import (
......@@ -278,7 +277,7 @@ class MiniCPMForCausalLM(nn.Module):
self,
config,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
self.config = config
......
......@@ -21,7 +21,6 @@ from typing import Any, Dict, Iterable, Optional, Tuple
import torch
from torch import nn
from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.linear import (
ColumnParallelLinear,
......@@ -108,7 +107,7 @@ class MiniCPM3Attention(nn.Module):
rope_theta: float = 10000,
rope_scaling: Optional[Dict[str, Any]] = None,
max_position_embeddings: int = 8192,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
layer_id=None,
) -> None:
......@@ -252,7 +251,7 @@ class MiniCPM3AttentionMLA(nn.Module):
rope_theta: float = 10000,
rope_scaling: Optional[Dict[str, Any]] = None,
max_position_embeddings: int = 8192,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
layer_id=None,
) -> None:
......@@ -409,7 +408,7 @@ class MiniCPM3DecoderLayer(nn.Module):
self,
config: PretrainedConfig,
layer_id: int,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
......@@ -501,7 +500,7 @@ class MiniCPM3Model(nn.Module):
def __init__(
self,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
......@@ -552,7 +551,7 @@ class MiniCPM3ForCausalLM(nn.Module):
def __init__(
self,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
......
......@@ -21,7 +21,6 @@ from typing import Iterable, Optional, Tuple
import torch
from torch import nn
from transformers import MixtralConfig
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.fused_moe import FusedMoE
from vllm.model_executor.layers.rotary_embedding import get_rope
......@@ -293,7 +292,7 @@ class MixtralForCausalLM(nn.Module):
self,
config: MixtralConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
self.config = config
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment