Unverified Commit 1f1542af authored by Jee Jee Li's avatar Jee Jee Li Committed by GitHub
Browse files

[Misc]Add BNB quantization for PaliGemmaForConditionalGeneration (#12237)


Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
parent 96912550
...@@ -136,7 +136,18 @@ class PaliGemmaMultiModalProjector(nn.Module): ...@@ -136,7 +136,18 @@ class PaliGemmaMultiModalProjector(nn.Module):
@INPUT_REGISTRY.register_input_processor(input_processor_for_paligemma) @INPUT_REGISTRY.register_input_processor(input_processor_for_paligemma)
class PaliGemmaForConditionalGeneration(nn.Module, SupportsMultiModal, class PaliGemmaForConditionalGeneration(nn.Module, SupportsMultiModal,
SupportsPP): SupportsPP):
packed_modules_mapping = {
"qkv_proj": [
"q_proj",
"k_proj",
"v_proj",
],
"gate_up_proj": [
"gate_proj",
"up_proj",
],
}
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__() super().__init__()
config = vllm_config.model_config.hf_config config = vllm_config.model_config.hf_config
......
...@@ -344,10 +344,16 @@ class SiglipMLP(nn.Module): ...@@ -344,10 +344,16 @@ class SiglipMLP(nn.Module):
self.config = config self.config = config
self.activation_fn = get_act_fn(config.hidden_act) self.activation_fn = get_act_fn(config.hidden_act)
# Special handling for BNB quantization
# For quantization, we require the hidden size to be a multiple of 64 if quant_config and quant_config.get_name() == "bitsandbytes":
quantizable = (config.hidden_size % 64 == 0 quantizable = True
and config.intermediate_size % 64 == 0) else:
# For other quantization, we require the hidden size to be a
# multiple of 64
quantizable = (
config.hidden_size % 64 == 0
and config.intermediate_size % 64 == 0
)
self.fc1 = ColumnParallelLinear( self.fc1 = ColumnParallelLinear(
config.hidden_size, config.hidden_size,
config.intermediate_size, config.intermediate_size,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment