draft_model_arch_groundtruth.json 2.58 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
{
    "abhigoyal/vllm-medusa-llama-68m-random": {
        "architectures": [
            "MedusaModel"
        ],
        "model_type": "medusa",
        "text_model_type": "medusa",
        "hidden_size": 768,
        "total_num_hidden_layers": 1,
        "total_num_attention_heads": 0,
        "head_size": "Error: integer division or modulo by zero",
        "vocab_size": 32000,
        "total_num_kv_heads": 0,
        "num_experts": 0,
        "is_deepseek_mla": false,
        "is_multimodal_model": false,
        "dtype": "torch.float32"
    },
    "luccafong/deepseek_mtp_draft_random": {
        "architectures": [
            "DeepSeekMTPModel"
        ],
        "model_type": "deepseek_mtp",
        "text_model_type": "deepseek_mtp",
        "hidden_size": 2560,
        "total_num_hidden_layers": 1,
        "total_num_attention_heads": 32,
        "head_size": 576,
        "vocab_size": 129280,
        "total_num_kv_heads": 32,
        "num_experts": 72,
        "is_deepseek_mla": true,
        "is_multimodal_model": false,
        "dtype": "torch.bfloat16"
    },
    "eagle618/eagle-deepseek-v3-random": {
        "architectures": [
            "EagleDeepSeekMTPModel"
        ],
        "model_type": "eagle",
        "text_model_type": "deepseek_mtp",
        "hidden_size": 2560,
        "total_num_hidden_layers": 1,
        "total_num_attention_heads": 32,
        "head_size": 576,
        "vocab_size": 129280,
        "total_num_kv_heads": 32,
        "num_experts": 72,
        "is_deepseek_mla": true,
        "is_multimodal_model": false,
        "dtype": "bfloat16"
    },
    "yuhuili/EAGLE-LLaMA3-Instruct-8B": {
        "architectures": [
            "EagleLlamaForCausalLM"
        ],
        "model_type": "eagle",
        "text_model_type": "llama",
        "hidden_size": 4096,
        "total_num_hidden_layers": 1,
        "total_num_attention_heads": 32,
        "head_size": 128,
        "vocab_size": 128256,
        "total_num_kv_heads": 8,
        "num_experts": 0,
        "is_deepseek_mla": false,
        "is_multimodal_model": false,
        "dtype": "float16"
    },
    "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B": {
        "architectures": [
            "Eagle3LlamaForCausalLM"
        ],
        "model_type": "eagle",
        "text_model_type": "llama",
        "hidden_size": 4096,
        "total_num_hidden_layers": 1,
        "total_num_attention_heads": 32,
        "head_size": 128,
        "vocab_size": 128256,
        "total_num_kv_heads": 8,
        "num_experts": 0,
        "is_deepseek_mla": false,
        "is_multimodal_model": false,
        "dtype": "float16"
    }
}