Unverified Commit f30c6482 authored by Azure's avatar Azure Committed by GitHub
Browse files

Merge pull request #151 from kvcache-ai/update-yaml

[update] Update marlin expert yaml example to fully use gpu.
parents a2fc2a86 0564ac84
......@@ -79,6 +79,24 @@
generate_device: "cuda:1"
prefill_device: "cuda:1"
- match:
name: "^model\\.layers\\.(0|[1-4])\\.mlp\\.experts$" # inject experts in layer 0~4 as marlin expert
replace:
class: ktransformers.operators.experts.KTransformersExperts
kwargs:
generate_device: "cuda:0" # run in cuda:0
generate_op: "KExpertsMarlin"
recursive: False
- match:
name: "^model\\.layers\\.([3][0])\\.mlp\\.experts$" # inject experts in layer 30~31 as marlin expert
replace:
class: ktransformers.operators.experts.KTransformersExperts
kwargs:
generate_device: "cuda:1"
generate_op: "KExpertsMarlin"
recursive: False
- match:
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.mlp\\.experts$"
replace:
......@@ -139,5 +157,5 @@
replace:
class: "default"
kwargs:
generate_device: "cuda:1"
prefill_device: "cuda:1"
generate_device: "cuda:0"
prefill_device: "cuda:0"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment