kv_cache_scales_qwen2_1_5b.json 1015 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
{
    "model_type": "qwen",
    "kv_cache": {
        "dtype": "float8_e4m3fn",
        "scaling_factor": {
            "0": {
                "0": 0.9846,
                 "1": 0.0645,
                 "2": 0.0731,
                 "3": 0.0800,
                 "4": 0.0748,
                 "5": 0.0780,
                 "6": 0.0702,
                 "7": 0.0894,
                 "8": 0.0410,
                 "9": 0.0758,
                 "10": 0.0556,
                 "11": 0.0731,
                 "12": 0.0899,
                 "13": 0.0780,
                 "14": 0.1441,
                 "15": 0.0914,
                 "16": 0.5614,
                 "17": 0.1067,
                 "18": 0.0537,
                 "19": 0.0658,
                 "20": 0.0523,
                 "21": 0.0533,
                 "22": 0.0699,
                 "23": 0.0635,
                 "24": 0.0588,
                 "25": 0.0884,
                 "26": 0.0947,
                 "27": 0.1032
            }
        }
    }
}