{ "DeepSeek-Coder-V2-Instruct": { "hidden_size": 5120, "intermediate_size": 12288, "max_position_embeddings": 163840, "model_type": "deepseek_v2", "num_attention_heads": 128, "num_hidden_layers": 60, "num_key_value_heads": 128, "vocab_size": 102400 }, "LLaMA-2-7B-32K": { "hidden_size": 4096, "intermediate_size": 11008, "max_position_embeddings": 32768, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "vocab_size": 32000 }, "Qwen2.5-7B-Instruct": { "hidden_size": 3584, "intermediate_size": 18944, "max_position_embeddings": 32768, "model_type": "qwen2", "num_attention_heads": 28, "num_hidden_layers": 28, "num_key_value_heads": 4, "vocab_size": 152064 }, "qwen2-72b-instruct": { "hidden_size": 8192, "intermediate_size": 29568, "max_position_embeddings": 32768, "model_type": "qwen2", "num_attention_heads": 64, "num_hidden_layers": 80, "num_key_value_heads": 8, "vocab_size": 152064 } }