Unverified Commit 167506b7 authored by Atream's avatar Atream Committed by GitHub
Browse files

Update DeepSeek-V3-Chat-multi-gpu-marlin.yaml

parent c9a0c442
...@@ -66,7 +66,7 @@ ...@@ -66,7 +66,7 @@
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.mlp\\.gate$" name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace: replace:
class: ktransformers.operators.gate.KMoEGate class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs: kwargs:
generate_device: "cuda:0" generate_device: "cuda:0"
prefill_device: "cuda:0" prefill_device: "cuda:0"
...@@ -74,7 +74,7 @@ ...@@ -74,7 +74,7 @@
name: "^model\\.layers\\.([3456][0-9])\\.mlp\\.gate$" name: "^model\\.layers\\.([3456][0-9])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace: replace:
class: ktransformers.operators.gate.KMoEGate # mlp module with custom forward function class: ktransformers.operators.gate.KMoEGateDeepSeekV3 # mlp module with custom forward function
kwargs: kwargs:
generate_device: "cuda:1" generate_device: "cuda:1"
prefill_device: "cuda:1" prefill_device: "cuda:1"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment