qwen.py 1.95 KB
Newer Older
Qing's avatar
Qing committed
1
2
3
4
5
6
7
8
9
10
11
12
# Copyright (c) Alibaba Cloud.
# LICENSE: https://huggingface.co/Qwen/Qwen-7B/blob/main/LICENSE

from transformers import PretrainedConfig


class QWenConfig(PretrainedConfig):
    model_type = "qwen"
    keys_to_ignore_at_inference = ["past_key_values"]

    def __init__(
        self,
Qing's avatar
Qing committed
13
14
15
16
17
18
19
        vocab_size=151936,
        hidden_size=4096,
        num_hidden_layers=32,
        num_attention_heads=32,
        emb_dropout_prob=0.0,
        attn_dropout_prob=0.0,
        layer_norm_epsilon=1e-6,
Qing's avatar
Qing committed
20
        initializer_range=0.02,
Qing's avatar
Qing committed
21
        max_position_embeddings=8192,
Qing's avatar
Qing committed
22
23
        scale_attn_weights=True,
        use_cache=True,
Qing's avatar
Qing committed
24
25
26
        bf16=False,
        fp16=False,
        fp32=False,
Qing's avatar
Qing committed
27
28
29
        kv_channels=128,
        rotary_pct=1.0,
        rotary_emb_base=10000,
Qing's avatar
Qing committed
30
31
32
33
        use_dynamic_ntk=True,
        use_logn_attn=True,
        use_flash_attn="auto",
        intermediate_size=22016,
Qing's avatar
Qing committed
34
35
36
37
38
        no_bias=True,
        tie_word_embeddings=False,
        **kwargs,
    ):
        self.vocab_size = vocab_size
Qing's avatar
Qing committed
39
40
41
42
43
44
        self.hidden_size = hidden_size
        self.intermediate_size = intermediate_size
        self.num_hidden_layers = num_hidden_layers
        self.num_attention_heads = num_attention_heads
        self.emb_dropout_prob = emb_dropout_prob
        self.attn_dropout_prob = attn_dropout_prob
Qing's avatar
Qing committed
45
46
47
48
        self.layer_norm_epsilon = layer_norm_epsilon
        self.initializer_range = initializer_range
        self.scale_attn_weights = scale_attn_weights
        self.use_cache = use_cache
Qing's avatar
Qing committed
49
        self.max_position_embeddings = max_position_embeddings
Qing's avatar
Qing committed
50
        self.bf16 = bf16
Qing's avatar
Qing committed
51
52
        self.fp16 = fp16
        self.fp32 = fp32
Qing's avatar
Qing committed
53
54
55
56
57
58
59
        self.kv_channels = kv_channels
        self.rotary_pct = rotary_pct
        self.rotary_emb_base = rotary_emb_base
        self.use_dynamic_ntk = use_dynamic_ntk
        self.use_logn_attn = use_logn_attn
        self.use_flash_attn = use_flash_attn
        self.no_bias = no_bias
Qing's avatar
Qing committed
60
        super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)