Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
Yuan2.0-M32_pytorch
Commits
6a583c2f
Commit
6a583c2f
authored
Aug 21, 2024
by
chenych
Browse files
update dtk to 24.04.1 and modify README
parent
7d576a9a
Changes
329
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
440 additions
and
0 deletions
+440
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/gemma.py
3rd_party/AutoGPTQ/auto_gptq/modeling/gemma.py
+21
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/gpt2.py
3rd_party/AutoGPTQ/auto_gptq/modeling/gpt2.py
+16
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/gpt_bigcode.py
3rd_party/AutoGPTQ/auto_gptq/modeling/gpt_bigcode.py
+16
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/gpt_neox.py
3rd_party/AutoGPTQ/auto_gptq/modeling/gpt_neox.py
+17
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/gptj.py
3rd_party/AutoGPTQ/auto_gptq/modeling/gptj.py
+19
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/internlm.py
3rd_party/AutoGPTQ/auto_gptq/modeling/internlm.py
+16
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/llama.py
3rd_party/AutoGPTQ/auto_gptq/modeling/llama.py
+32
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/longllama.py
3rd_party/AutoGPTQ/auto_gptq/modeling/longllama.py
+32
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/mistral.py
3rd_party/AutoGPTQ/auto_gptq/modeling/mistral.py
+16
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/mixtral.py
3rd_party/AutoGPTQ/auto_gptq/modeling/mixtral.py
+42
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/moss.py
3rd_party/AutoGPTQ/auto_gptq/modeling/moss.py
+16
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/mpt.py
3rd_party/AutoGPTQ/auto_gptq/modeling/mpt.py
+19
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/opt.py
3rd_party/AutoGPTQ/auto_gptq/modeling/opt.py
+22
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/phi.py
3rd_party/AutoGPTQ/auto_gptq/modeling/phi.py
+18
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/qwen.py
3rd_party/AutoGPTQ/auto_gptq/modeling/qwen.py
+21
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/qwen2.py
3rd_party/AutoGPTQ/auto_gptq/modeling/qwen2.py
+16
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/rw.py
3rd_party/AutoGPTQ/auto_gptq/modeling/rw.py
+16
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/stablelmepoch.py
3rd_party/AutoGPTQ/auto_gptq/modeling/stablelmepoch.py
+32
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/starcoder2.py
3rd_party/AutoGPTQ/auto_gptq/modeling/starcoder2.py
+21
-0
3rd_party/AutoGPTQ/auto_gptq/modeling/xverse.py
3rd_party/AutoGPTQ/auto_gptq/modeling/xverse.py
+32
-0
No files found.
3rd_party/AutoGPTQ/auto_gptq/modeling/gemma.py
0 → 100644
View file @
6a583c2f
from
logging
import
getLogger
from
._base
import
BaseGPTQForCausalLM
logger
=
getLogger
(
__name__
)
class
GemmaGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"GemmaDecoderLayer"
layers_block_name
=
"model.layers"
outside_layer_modules
=
[
"model.embed_tokens"
,
"model.norm"
]
inside_layer_modules
=
[
[
"self_attn.k_proj"
,
"self_attn.v_proj"
,
"self_attn.q_proj"
],
[
"self_attn.o_proj"
],
[
"mlp.up_proj"
,
"mlp.gate_proj"
],
[
"mlp.down_proj"
],
]
__all__
=
[
"GemmaGPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/gpt2.py
0 → 100644
View file @
6a583c2f
from
._base
import
BaseGPTQForCausalLM
class
GPT2GPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"GPT2Block"
layers_block_name
=
"transformer.h"
outside_layer_modules
=
[
"transformer.wte"
,
"transformer.wpe"
,
"transformer.ln_f"
]
inside_layer_modules
=
[
[
"attn.c_attn"
],
[
"attn.c_proj"
],
[
"mlp.c_fc"
],
[
"mlp.c_proj"
],
]
__all__
=
[
"GPT2GPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/gpt_bigcode.py
0 → 100644
View file @
6a583c2f
from
._base
import
BaseGPTQForCausalLM
class
GPTBigCodeGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"GPTBigCodeBlock"
layers_block_name
=
"transformer.h"
outside_layer_modules
=
[
"transformer.wpe"
,
"transformer.wte"
,
"transformer.ln_f"
]
inside_layer_modules
=
[
[
"attn.c_attn"
],
[
"attn.c_proj"
],
[
"mlp.c_fc"
],
[
"mlp.c_proj"
],
]
__all__
=
[
"GPTBigCodeGPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/gpt_neox.py
0 → 100644
View file @
6a583c2f
from
._base
import
BaseGPTQForCausalLM
class
GPTNeoXGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"GPTNeoXLayer"
layers_block_name
=
"gpt_neox.layers"
outside_layer_modules
=
[
"gpt_neox.embed_in"
,
"gpt_neox.final_layer_norm"
]
inside_layer_modules
=
[
[
"attention.query_key_value"
],
[
"attention.dense"
],
[
"mlp.dense_h_to_4h"
],
[
"mlp.dense_4h_to_h"
],
]
lm_head_name
=
"embed_out"
__all__
=
[
"GPTNeoXGPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/gptj.py
0 → 100644
View file @
6a583c2f
from
..nn_modules.fused_gptj_attn
import
FusedGPTJAttentionForQuantizedModel
from
._base
import
BaseGPTQForCausalLM
class
GPTJGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"GPTJBlock"
layers_block_name
=
"transformer.h"
outside_layer_modules
=
[
"transformer.wte"
,
"transformer.ln_f"
]
inside_layer_modules
=
[
[
"attn.k_proj"
,
"attn.v_proj"
,
"attn.q_proj"
],
[
"attn.out_proj"
],
[
"mlp.fc_in"
],
[
"mlp.fc_out"
],
]
fused_attn_module_type
=
FusedGPTJAttentionForQuantizedModel
__all__
=
[
"GPTJGPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/internlm.py
0 → 100644
View file @
6a583c2f
from
._base
import
BaseGPTQForCausalLM
class
InternLMGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"InternLMDecoderLayer"
layers_block_name
=
"model.layers"
outside_layer_modules
=
[
"model.embed_tokens"
,
"model.norm"
]
inside_layer_modules
=
[
[
"self_attn.k_proj"
,
"self_attn.v_proj"
,
"self_attn.q_proj"
],
[
"self_attn.o_proj"
],
[
"mlp.up_proj"
,
"mlp.gate_proj"
],
[
"mlp.down_proj"
],
]
__all__
=
[
"InternLMGPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/llama.py
0 → 100644
View file @
6a583c2f
from
logging
import
getLogger
from
..utils.import_utils
import
compare_transformers_version
from
._base
import
BaseGPTQForCausalLM
if
compare_transformers_version
(
"v4.28.0"
,
op
=
"ge"
):
from
..nn_modules.fused_llama_attn
import
FusedLlamaAttentionForQuantizedModel
from
..nn_modules.fused_llama_mlp
import
FusedLlamaMLPForQuantizedModel
else
:
FusedLlamaAttentionForQuantizedModel
=
None
FusedLlamaMLPForQuantizedModel
=
None
logger
=
getLogger
(
__name__
)
class
LlamaGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"LlamaDecoderLayer"
layers_block_name
=
"model.layers"
outside_layer_modules
=
[
"model.embed_tokens"
,
"model.norm"
]
inside_layer_modules
=
[
[
"self_attn.k_proj"
,
"self_attn.v_proj"
,
"self_attn.q_proj"
],
[
"self_attn.o_proj"
],
[
"mlp.up_proj"
,
"mlp.gate_proj"
],
[
"mlp.down_proj"
],
]
fused_attn_module_type
=
FusedLlamaAttentionForQuantizedModel
fused_mlp_module_type
=
FusedLlamaMLPForQuantizedModel
__all__
=
[
"LlamaGPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/longllama.py
0 → 100644
View file @
6a583c2f
from
logging
import
getLogger
from
..utils.import_utils
import
compare_transformers_version
from
._base
import
BaseGPTQForCausalLM
if
compare_transformers_version
(
"v4.28.0"
,
op
=
"ge"
):
from
..nn_modules.fused_llama_attn
import
FusedLlamaAttentionForQuantizedModel
from
..nn_modules.fused_llama_mlp
import
FusedLlamaMLPForQuantizedModel
else
:
FusedLlamaAttentionForQuantizedModel
=
None
FusedLlamaMLPForQuantizedModel
=
None
logger
=
getLogger
(
__name__
)
class
LongLlamaGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"LongLlamaDecoderLayer"
layers_block_name
=
"model.layers"
outside_layer_modules
=
[
"model.embed_tokens"
,
"model.norm"
]
inside_layer_modules
=
[
[
"self_attn.k_proj"
,
"self_attn.v_proj"
,
"self_attn.q_proj"
],
[
"self_attn.o_proj"
],
[
"mlp.up_proj"
,
"mlp.gate_proj"
],
[
"mlp.down_proj"
],
]
fused_attn_module_type
=
FusedLlamaAttentionForQuantizedModel
fused_mlp_module_type
=
FusedLlamaMLPForQuantizedModel
__all__
=
[
"LongLlamaGPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/mistral.py
0 → 100644
View file @
6a583c2f
from
._base
import
BaseGPTQForCausalLM
class
MistralGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"MistralDecoderLayer"
layers_block_name
=
"model.layers"
outside_layer_modules
=
[
"model.embed_tokens"
,
"model.norm"
]
inside_layer_modules
=
[
[
"self_attn.k_proj"
,
"self_attn.v_proj"
,
"self_attn.q_proj"
],
[
"self_attn.o_proj"
],
[
"mlp.up_proj"
,
"mlp.gate_proj"
],
[
"mlp.down_proj"
],
]
__all__
=
[
"MistralGPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/mixtral.py
0 → 100644
View file @
6a583c2f
from
._base
import
BaseGPTQForCausalLM
class
MixtralGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"MixtralDecoderLayer"
layers_block_name
=
"model.layers"
outside_layer_modules
=
[
"model.embed_tokens"
,
"model.norm"
]
inside_layer_modules
=
[
[
"self_attn.k_proj"
,
"self_attn.v_proj"
,
"self_attn.q_proj"
],
[
"self_attn.o_proj"
],
[
"block_sparse_moe.experts.0.w1"
,
"block_sparse_moe.experts.1.w1"
,
"block_sparse_moe.experts.2.w1"
,
"block_sparse_moe.experts.3.w1"
,
"block_sparse_moe.experts.4.w1"
,
"block_sparse_moe.experts.5.w1"
,
"block_sparse_moe.experts.6.w1"
,
"block_sparse_moe.experts.7.w1"
,
"block_sparse_moe.experts.0.w3"
,
"block_sparse_moe.experts.1.w3"
,
"block_sparse_moe.experts.2.w3"
,
"block_sparse_moe.experts.3.w3"
,
"block_sparse_moe.experts.4.w3"
,
"block_sparse_moe.experts.5.w3"
,
"block_sparse_moe.experts.6.w3"
,
"block_sparse_moe.experts.7.w3"
,
],
[
"block_sparse_moe.experts.0.w2"
,
"block_sparse_moe.experts.1.w2"
,
"block_sparse_moe.experts.2.w2"
,
"block_sparse_moe.experts.3.w2"
,
"block_sparse_moe.experts.4.w2"
,
"block_sparse_moe.experts.5.w2"
,
"block_sparse_moe.experts.6.w2"
,
"block_sparse_moe.experts.7.w2"
,
],
]
__all__
=
[
"MixtralGPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/moss.py
0 → 100644
View file @
6a583c2f
from
._base
import
BaseGPTQForCausalLM
class
MOSSGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"MossBlock"
layers_block_name
=
"transformer.h"
outside_layer_modules
=
[
"transformer.wte"
,
"transformer.ln_f"
]
inside_layer_modules
=
[
[
"attn.qkv_proj"
],
[
"attn.out_proj"
],
[
"mlp.fc_in"
],
[
"mlp.fc_out"
],
]
__all__
=
[
"MOSSGPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/mpt.py
0 → 100644
View file @
6a583c2f
from
auto_gptq.modeling
import
BaseGPTQForCausalLM
class
MPTGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"MPTBlock"
layers_block_name
=
"transformer.blocks"
outside_layer_modules
=
[
"transformer.wte"
,
"transformer.norm_f"
]
inside_layer_modules
=
[
[
"attn.Wqkv"
],
[
"attn.out_proj"
],
[
"ffn.up_proj"
],
[
"ffn.down_proj"
]
]
__all__
=
[
"MPTGPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/opt.py
0 → 100644
View file @
6a583c2f
from
._base
import
BaseGPTQForCausalLM
class
OPTGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"OPTDecoderLayer"
layers_block_name
=
"model.decoder.layers"
outside_layer_modules
=
[
"model.decoder.embed_tokens"
,
"model.decoder.embed_positions"
,
"model.decoder.project_out"
,
"model.decoder.project_in"
,
"model.decoder.final_layer_norm"
,
]
inside_layer_modules
=
[
[
"self_attn.k_proj"
,
"self_attn.v_proj"
,
"self_attn.q_proj"
],
[
"self_attn.out_proj"
],
[
"fc1"
],
[
"fc2"
],
]
__all__
=
[
"OPTGPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/phi.py
0 → 100644
View file @
6a583c2f
from
._base
import
BaseGPTQForCausalLM
class
PhiGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"PhiDecoderLayer"
layers_block_name
=
"model.layers"
outside_layer_modules
=
[
"model.embed_tokens"
,
"model.final_layernorm"
]
inside_layer_modules
=
[
[
"self_attn.q_proj"
],
[
"self_attn.k_proj"
],
[
"self_attn.v_proj"
],
[
"self_attn.dense"
],
[
"mlp.fc1"
],
[
"mlp.fc2"
],
]
__all__
=
[
"PhiGPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/qwen.py
0 → 100644
View file @
6a583c2f
from
._base
import
BaseGPTQForCausalLM
class
QwenGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"QWenBlock"
layers_block_name
=
"transformer.h"
outside_layer_modules
=
[
"transformer.wte"
,
"transformer.wpe"
,
"transformer.ln_f"
,
"transformer.visual"
,
]
inside_layer_modules
=
[
[
"attn.c_attn"
],
[
"attn.c_proj"
],
[
"mlp.w1"
,
"mlp.w2"
],
[
"mlp.c_proj"
],
]
__all__
=
[
"QwenGPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/qwen2.py
0 → 100644
View file @
6a583c2f
from
._base
import
BaseGPTQForCausalLM
class
Qwen2GPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"Qwen2DecoderLayer"
layers_block_name
=
"model.layers"
outside_layer_modules
=
[
"model.embed_tokens"
,
"model.norm"
]
inside_layer_modules
=
[
[
"self_attn.k_proj"
,
"self_attn.v_proj"
,
"self_attn.q_proj"
],
[
"self_attn.o_proj"
],
[
"mlp.up_proj"
,
"mlp.gate_proj"
],
[
"mlp.down_proj"
],
]
__all__
=
[
"Qwen2GPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/rw.py
0 → 100644
View file @
6a583c2f
from
._base
import
BaseGPTQForCausalLM
class
RWGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"DecoderLayer"
layers_block_name
=
"transformer.h"
outside_layer_modules
=
[
"transformer.word_embeddings"
,
"transformer.ln_f"
]
inside_layer_modules
=
[
[
"self_attention.query_key_value"
],
[
"self_attention.dense"
],
[
"mlp.dense_h_to_4h"
],
[
"mlp.dense_4h_to_h"
],
]
__all__
=
[
"RWGPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/stablelmepoch.py
0 → 100644
View file @
6a583c2f
from
logging
import
getLogger
from
..utils.import_utils
import
compare_transformers_version
from
._base
import
BaseGPTQForCausalLM
if
compare_transformers_version
(
"v4.28.0"
,
op
=
"ge"
):
from
..nn_modules.fused_llama_attn
import
FusedLlamaAttentionForQuantizedModel
from
..nn_modules.fused_llama_mlp
import
FusedLlamaMLPForQuantizedModel
else
:
FusedLlamaAttentionForQuantizedModel
=
None
FusedLlamaMLPForQuantizedModel
=
None
logger
=
getLogger
(
__name__
)
class
StableLMEpochGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"DecoderLayer"
layers_block_name
=
"model.layers"
outside_layer_modules
=
[
"model.embed_tokens"
,
"model.norm"
]
inside_layer_modules
=
[
[
"self_attn.k_proj"
,
"self_attn.v_proj"
,
"self_attn.q_proj"
],
[
"self_attn.o_proj"
],
[
"mlp.up_proj"
,
"mlp.gate_proj"
],
[
"mlp.down_proj"
],
]
fused_attn_module_type
=
FusedLlamaAttentionForQuantizedModel
fused_mlp_module_type
=
FusedLlamaMLPForQuantizedModel
__all__
=
[
"StableLMEpochGPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/starcoder2.py
0 → 100644
View file @
6a583c2f
from
logging
import
getLogger
from
._base
import
BaseGPTQForCausalLM
logger
=
getLogger
(
__name__
)
class
Starcoder2GPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"Starcoder2DecoderLayer"
layers_block_name
=
"model.layers"
outside_layer_modules
=
[
"model.embed_tokens"
,
"model.norm"
]
inside_layer_modules
=
[
[
"self_attn.k_proj"
,
"self_attn.v_proj"
,
"self_attn.q_proj"
],
[
"self_attn.o_proj"
],
[
"mlp.c_fc"
],
[
"mlp.c_proj"
],
]
__all__
=
[
"Starcoder2GPTQForCausalLM"
]
3rd_party/AutoGPTQ/auto_gptq/modeling/xverse.py
0 → 100644
View file @
6a583c2f
from
logging
import
getLogger
from
..utils.import_utils
import
compare_transformers_version
from
._base
import
BaseGPTQForCausalLM
if
compare_transformers_version
(
"v4.28.0"
,
op
=
"ge"
):
from
..nn_modules.fused_llama_attn
import
FusedLlamaAttentionForQuantizedModel
from
..nn_modules.fused_llama_mlp
import
FusedLlamaMLPForQuantizedModel
else
:
FusedLlamaAttentionForQuantizedModel
=
None
FusedLlamaMLPForQuantizedModel
=
None
logger
=
getLogger
(
__name__
)
class
XverseGPTQForCausalLM
(
BaseGPTQForCausalLM
):
layer_type
=
"XverseDecoderLayer"
layers_block_name
=
"model.layers"
outside_layer_modules
=
[
"model.embed_tokens"
,
"model.norm"
]
inside_layer_modules
=
[
[
"self_attn.k_proj"
,
"self_attn.v_proj"
,
"self_attn.q_proj"
],
[
"self_attn.o_proj"
],
[
"mlp.up_proj"
,
"mlp.gate_proj"
],
[
"mlp.down_proj"
],
]
fused_attn_module_type
=
FusedLlamaAttentionForQuantizedModel
fused_mlp_module_type
=
FusedLlamaMLPForQuantizedModel
__all__
=
[
"XverseGPTQForCausalLM"
]
Prev
1
2
3
4
5
6
7
…
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment