Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ox696c
ktransformers
Commits
3dca28d2
Commit
3dca28d2
authored
Feb 06, 2025
by
liam
Browse files
⚡
fix moe.cpp int overflow problem
parent
ee24a270
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
3 deletions
+3
-3
ktransformers/ktransformers_ext/operators/llamafile/moe.cpp
ktransformers/ktransformers_ext/operators/llamafile/moe.cpp
+2
-2
ktransformers/local_chat.py
ktransformers/local_chat.py
+1
-1
No files found.
ktransformers/ktransformers_ext/operators/llamafile/moe.cpp
View file @
3dca28d2
...
@@ -224,7 +224,7 @@ void MOE::forward_many(int qlen, int k, const uint64_t* expert_ids, const float*
...
@@ -224,7 +224,7 @@ void MOE::forward_many(int qlen, int k, const uint64_t* expert_ids, const float*
int
stride
=
QK_K
;
int
stride
=
QK_K
;
int
nth
=
config_
.
intermediate_size
/
stride
;
int
nth
=
config_
.
intermediate_size
/
stride
;
backend
->
do_work_stealing_job
(
nth
*
config_
.
expert_num
,
nullptr
,
[
&
](
int
task_id
)
{
backend
->
do_work_stealing_job
(
nth
*
config_
.
expert_num
,
nullptr
,
[
&
](
int
task_id
)
{
int
expert_idx
=
task_id
/
nth
;
u
int
64_t
expert_idx
=
task_id
/
nth
;
int
ith
=
task_id
%
nth
;
int
ith
=
task_id
%
nth
;
void
*
gate_input_ptr
=
m_local_gate_input_ptr_
[
expert_idx
];
void
*
gate_input_ptr
=
m_local_gate_input_ptr_
[
expert_idx
];
void
*
gate_proj_ptr
=
(
uint8_t
*
)
gate_proj_
+
(
expert_idx
*
config_
.
intermediate_size
+
ith
*
stride
)
*
config_
.
hidden_size
*
ggml_type_size
(
config_
.
gate_type
)
/
ggml_blck_size
(
config_
.
gate_type
);
void
*
gate_proj_ptr
=
(
uint8_t
*
)
gate_proj_
+
(
expert_idx
*
config_
.
intermediate_size
+
ith
*
stride
)
*
config_
.
hidden_size
*
ggml_type_size
(
config_
.
gate_type
)
/
ggml_blck_size
(
config_
.
gate_type
);
...
@@ -246,7 +246,7 @@ void MOE::forward_many(int qlen, int k, const uint64_t* expert_ids, const float*
...
@@ -246,7 +246,7 @@ void MOE::forward_many(int qlen, int k, const uint64_t* expert_ids, const float*
stride
=
QK_K
;
stride
=
QK_K
;
nth
=
config_
.
hidden_size
/
stride
;
nth
=
config_
.
hidden_size
/
stride
;
backend
->
do_work_stealing_job
(
nth
*
config_
.
expert_num
,
nullptr
,
[
&
](
int
task_id
)
{
backend
->
do_work_stealing_job
(
nth
*
config_
.
expert_num
,
nullptr
,
[
&
](
int
task_id
)
{
int
expert_idx
=
task_id
/
nth
;
u
int
64_t
expert_idx
=
task_id
/
nth
;
int
ith
=
task_id
%
nth
;
int
ith
=
task_id
%
nth
;
void
*
down_input_ptr
=
m_local_down_input_ptr_
[
expert_idx
];
void
*
down_input_ptr
=
m_local_down_input_ptr_
[
expert_idx
];
void
*
down_proj_ptr
=
(
uint8_t
*
)
down_proj_
+
(
expert_idx
*
config_
.
hidden_size
+
ith
*
stride
)
*
config_
.
intermediate_size
*
ggml_type_size
(
config_
.
down_type
)
/
ggml_blck_size
(
config_
.
down_type
);
void
*
down_proj_ptr
=
(
uint8_t
*
)
down_proj_
+
(
expert_idx
*
config_
.
hidden_size
+
ith
*
stride
)
*
config_
.
intermediate_size
*
ggml_type_size
(
config_
.
down_type
)
/
ggml_blck_size
(
config_
.
down_type
);
...
...
ktransformers/local_chat.py
View file @
3dca28d2
...
@@ -32,7 +32,7 @@ custom_models = {
...
@@ -32,7 +32,7 @@ custom_models = {
ktransformer_rules_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
+
"/optimize/optimize_rules/"
ktransformer_rules_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
+
"/optimize/optimize_rules/"
default_optimize_rules
=
{
default_optimize_rules
=
{
"DeepseekV2ForCausalLM"
:
ktransformer_rules_dir
+
"DeepSeek-V2-Chat.yaml"
,
"DeepseekV2ForCausalLM"
:
ktransformer_rules_dir
+
"DeepSeek-V2-Chat.yaml"
,
"DeepseekV3ForCausalLM"
:
ktransformer_rules_dir
+
"DeepSeek-V3-Chat.yaml"
,
"DeepseekV3ForCausalLM"
:
ktransformer_rules_dir
+
"DeepSeek-V3-Chat
-multi-gpu
.yaml"
,
"Qwen2MoeForCausalLM"
:
ktransformer_rules_dir
+
"Qwen2-57B-A14B-Instruct.yaml"
,
"Qwen2MoeForCausalLM"
:
ktransformer_rules_dir
+
"Qwen2-57B-A14B-Instruct.yaml"
,
"LlamaForCausalLM"
:
ktransformer_rules_dir
+
"Internlm2_5-7b-Chat-1m.yaml"
,
"LlamaForCausalLM"
:
ktransformer_rules_dir
+
"Internlm2_5-7b-Chat-1m.yaml"
,
"MixtralForCausalLM"
:
ktransformer_rules_dir
+
"Mixtral.yaml"
,
"MixtralForCausalLM"
:
ktransformer_rules_dir
+
"Mixtral.yaml"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment