Unverified Commit 39e63dec authored by Jee Jee Li's avatar Jee Jee Li Committed by GitHub
Browse files

[LoRA] Cleanup LoRA unused code (#29611)


Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
Co-authored-by: default avatarCyrus Leung <tlleungac@connect.ust.hk>
parent 4a80ad0a
...@@ -34,12 +34,10 @@ class WorkerLoRAManager: ...@@ -34,12 +34,10 @@ class WorkerLoRAManager:
vllm_config: VllmConfig, vllm_config: VllmConfig,
device: torch.device, device: torch.device,
embedding_modules: dict[str, str], embedding_modules: dict[str, str],
embedding_padding_modules: list[str],
lora_model_cls: type[LoRAModel] = LoRAModel, lora_model_cls: type[LoRAModel] = LoRAModel,
): ):
self._lora_model_cls = lora_model_cls self._lora_model_cls = lora_model_cls
self.embedding_modules = embedding_modules self.embedding_modules = embedding_modules
self.embedding_padding_modules = embedding_padding_modules
self._cached_dummy_lora: None | Literal[False] | LoRAModel = False self._cached_dummy_lora: None | Literal[False] | LoRAModel = False
self.max_num_seqs = vllm_config.scheduler_config.max_num_seqs self.max_num_seqs = vllm_config.scheduler_config.max_num_seqs
self.max_num_batched_tokens = ( self.max_num_batched_tokens = (
...@@ -121,9 +119,7 @@ class WorkerLoRAManager: ...@@ -121,9 +119,7 @@ class WorkerLoRAManager:
lora_model_id=lora_request.lora_int_id, lora_model_id=lora_request.lora_int_id,
device="cpu", device="cpu",
dtype=self.lora_config.lora_dtype, dtype=self.lora_config.lora_dtype,
target_embedding_padding=self.vocab_size, model_vocab_size=self.vocab_size,
embedding_modules=self.embedding_modules,
embedding_padding_modules=self.embedding_padding_modules,
tensorizer_config_dict=lora_request.tensorizer_config_dict, tensorizer_config_dict=lora_request.tensorizer_config_dict,
weights_mapper=hf_to_vllm_mapper, weights_mapper=hf_to_vllm_mapper,
) )
......
...@@ -482,7 +482,6 @@ class ApertusForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ...@@ -482,7 +482,6 @@ class ApertusForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
def __init__( def __init__(
self, self,
......
...@@ -419,7 +419,6 @@ class BambaForCausalLM( ...@@ -419,7 +419,6 @@ class BambaForCausalLM(
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
@classmethod @classmethod
def get_mamba_state_dtype_from_config( def get_mamba_state_dtype_from_config(
......
...@@ -457,7 +457,6 @@ class ExaoneForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ...@@ -457,7 +457,6 @@ class ExaoneForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"wte": "input_embeddings", "wte": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__() super().__init__()
......
...@@ -450,7 +450,6 @@ class Exaone4ForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ...@@ -450,7 +450,6 @@ class Exaone4ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__() super().__init__()
......
...@@ -510,7 +510,6 @@ class FalconH1ForCausalLM( ...@@ -510,7 +510,6 @@ class FalconH1ForCausalLM(
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
@classmethod @classmethod
def get_mamba_state_dtype_from_config( def get_mamba_state_dtype_from_config(
......
...@@ -400,7 +400,6 @@ class GraniteForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ...@@ -400,7 +400,6 @@ class GraniteForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__() super().__init__()
......
...@@ -497,7 +497,6 @@ class GraniteMoeForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ...@@ -497,7 +497,6 @@ class GraniteMoeForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__() super().__init__()
......
...@@ -601,7 +601,6 @@ class GraniteMoeHybridForCausalLM( ...@@ -601,7 +601,6 @@ class GraniteMoeHybridForCausalLM(
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
@classmethod @classmethod
def get_mamba_state_dtype_from_config( def get_mamba_state_dtype_from_config(
......
...@@ -263,7 +263,6 @@ class GraniteMoeSharedForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ...@@ -263,7 +263,6 @@ class GraniteMoeSharedForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__() super().__init__()
......
...@@ -347,7 +347,6 @@ class SupportsLoRA(Protocol): ...@@ -347,7 +347,6 @@ class SupportsLoRA(Protocol):
# The `embedding_module` and `embedding_padding_modules` # The `embedding_module` and `embedding_padding_modules`
# are empty by default. # are empty by default.
embedding_modules: ClassVar[dict[str, str]] = {} embedding_modules: ClassVar[dict[str, str]] = {}
embedding_padding_modules: ClassVar[list[str]] = []
packed_modules_mapping: dict[str, list[str]] = {} packed_modules_mapping: dict[str, list[str]] = {}
...@@ -359,7 +358,6 @@ class _SupportsLoRAType(Protocol): ...@@ -359,7 +358,6 @@ class _SupportsLoRAType(Protocol):
packed_modules_mapping: dict[str, list[str]] packed_modules_mapping: dict[str, list[str]]
embedding_modules: dict[str, str] embedding_modules: dict[str, str]
embedding_padding_modules: list[str]
@overload @overload
...@@ -379,7 +377,6 @@ def supports_lora( ...@@ -379,7 +377,6 @@ def supports_lora(
lora_attrs = ( lora_attrs = (
"packed_modules_mapping", "packed_modules_mapping",
"embedding_modules", "embedding_modules",
"embedding_padding_modules",
) )
missing_attrs = tuple(attr for attr in lora_attrs if not hasattr(model, attr)) missing_attrs = tuple(attr for attr in lora_attrs if not hasattr(model, attr))
......
...@@ -480,7 +480,6 @@ class JambaForCausalLM( ...@@ -480,7 +480,6 @@ class JambaForCausalLM(
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
config = vllm_config.model_config.hf_config config = vllm_config.model_config.hf_config
......
...@@ -422,7 +422,6 @@ class Lfm2ForCausalLM( ...@@ -422,7 +422,6 @@ class Lfm2ForCausalLM(
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
@classmethod @classmethod
def get_mamba_state_dtype_from_config( def get_mamba_state_dtype_from_config(
......
...@@ -602,7 +602,6 @@ class Lfm2MoeForCausalLM( ...@@ -602,7 +602,6 @@ class Lfm2MoeForCausalLM(
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
@classmethod @classmethod
def get_mamba_state_dtype_from_config( def get_mamba_state_dtype_from_config(
......
...@@ -528,7 +528,6 @@ class LlamaForCausalLM( ...@@ -528,7 +528,6 @@ class LlamaForCausalLM(
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
# Mistral/Llama models can also be loaded with --load-format mistral # Mistral/Llama models can also be loaded with --load-format mistral
# from consolidated.safetensors checkpoints # from consolidated.safetensors checkpoints
......
...@@ -568,7 +568,6 @@ class MiniCPMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsEagle3): ...@@ -568,7 +568,6 @@ class MiniCPMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsEagle3):
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__() super().__init__()
......
...@@ -305,7 +305,6 @@ class EagleMiniCPMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsEagle ...@@ -305,7 +305,6 @@ class EagleMiniCPMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsEagle
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__() super().__init__()
......
...@@ -1741,5 +1741,4 @@ class MiniCPMV(MiniCPMVBaseModel, SupportsMultiModal, SupportsLoRA): ...@@ -1741,5 +1741,4 @@ class MiniCPMV(MiniCPMVBaseModel, SupportsMultiModal, SupportsLoRA):
# so update values before init is called # so update values before init is called
cls.packed_modules_mapping.update(instance_cls.packed_modules_mapping) cls.packed_modules_mapping.update(instance_cls.packed_modules_mapping)
cls.embedding_modules.update(instance_cls.embedding_modules) cls.embedding_modules.update(instance_cls.embedding_modules)
cls.embedding_padding_modules += instance_cls.embedding_padding_modules
return instance_cls(vllm_config=vllm_config, prefix=prefix) return instance_cls(vllm_config=vllm_config, prefix=prefix)
...@@ -496,7 +496,6 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP, MixtureOfExperts): ...@@ -496,7 +496,6 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP, MixtureOfExperts):
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__() super().__init__()
......
...@@ -439,7 +439,6 @@ class NemotronForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ...@@ -439,7 +439,6 @@ class NemotronForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
"embed_tokens": "input_embeddings", "embed_tokens": "input_embeddings",
"lm_head": "output_embeddings", "lm_head": "output_embeddings",
} }
embedding_padding_modules = ["lm_head"]
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__() super().__init__()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment