phi3.py 649 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# Adapted from llama.py
"""Inference-only Phi3 model code inherit from Llama.py"""

from vllm.model_executor.models.llama import LlamaForCausalLM


class Phi3ForCausalLM(LlamaForCausalLM):

    packed_modules_mapping = {
        "qkv_proj": [
            "qkv_proj",
        ],
        "gate_up_proj": [
            "gate_up_proj",
        ],
    }
17
18
19
20
21
22
23
24
25
26

    # BitandBytes specific attributes
    default_bitsandbytes_target_modules = [
        ".gate_up_proj.",
        ".down_proj.",
        ".qkv_proj.",
        ".o_proj.",
    ]
    # Initialize an empty dict when there is no stacked parameter mapping.
    bitsandbytes_stacked_params_mapping = {}