phi3.py 508 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# Adapted from llama.py
"""Inference-only Phi3 model code inherit from Llama.py"""

from vllm.model_executor.models.llama import LlamaForCausalLM


class Phi3ForCausalLM(LlamaForCausalLM):

    packed_modules_mapping = {
        "qkv_proj": [
            "qkv_proj",
        ],
        "gate_up_proj": [
            "gate_up_proj",
        ],
    }
17
18
19
20

    # BitandBytes specific attributes
    # Initialize an empty dict when there is no stacked parameter mapping.
    bitsandbytes_stacked_params_mapping = {}